QwenLong-L1-32B-4bit-DWQ / benchmark_script.py
Narutoouz's picture
Upload QwenLong-L1-32B-4bit-DWQ DWQ 4-bit quantized model with comprehensive documentation
478d3b0 verified
#!/usr/bin/env python3
"""
Benchmarking script for DWQ model validation
"""
import time
import psutil
from mlx_lm import load, generate
def benchmark_model(model_path):
# Load model
start = time.time()
model, tokenizer = load(model_path)
load_time = time.time() - start
# Test categories
tests = {
"coding": "Write a Python function to sort a list:",
"qa": "What is quantum computing?",
"reasoning": "If A>B and B>C, what's the relationship between A and C?"
}
results = {"load_time": load_time}
for category, prompt in tests.items():
start = time.time()
response = generate(model, tokenizer, prompt=prompt, max_tokens=50)
results[f"{category}_time"] = time.time() - start
results[f"{category}_sample"] = response[:100] + "..."
return results
if __name__ == "__main__":
results = benchmark_model("./")
print("Benchmark Results:")
for key, value in results.items():
print(f"{key}: {value}")