QwenLong-L1-32B-4bit-DWQ / benchmark_script.py

Upload QwenLong-L1-32B-4bit-DWQ DWQ 4-bit quantized model with comprehensive documentation

478d3b0 verified 7 months ago

1.03 kB

	#!/usr/bin/env python3
	"""
	Benchmarking script for DWQ model validation
	"""

	import time
	import psutil
	from mlx_lm import load, generate

	def benchmark_model(model_path):
	# Load model
	start = time.time()
	model, tokenizer = load(model_path)
	load_time = time.time() - start

	# Test categories
	tests = {
	"coding": "Write a Python function to sort a list:",
	"qa": "What is quantum computing?",
	"reasoning": "If A>B and B>C, what's the relationship between A and C?"
	}

	results = {"load_time": load_time}

	for category, prompt in tests.items():
	start = time.time()
	response = generate(model, tokenizer, prompt=prompt, max_tokens=50)
	results[f"{category}_time"] = time.time() - start
	results[f"{category}_sample"] = response[:100] + "..."

	return results

	if __name__ == "__main__":
	results = benchmark_model("./")
	print("Benchmark Results:")
	for key, value in results.items():
	print(f"{key}: {value}")