| #!/usr/bin/env python3 | |
| """ | |
| Benchmarking script for DWQ model validation | |
| """ | |
| import time | |
| import psutil | |
| from mlx_lm import load, generate | |
| def benchmark_model(model_path): | |
| # Load model | |
| start = time.time() | |
| model, tokenizer = load(model_path) | |
| load_time = time.time() - start | |
| # Test categories | |
| tests = { | |
| "coding": "Write a Python function to sort a list:", | |
| "qa": "What is quantum computing?", | |
| "reasoning": "If A>B and B>C, what's the relationship between A and C?" | |
| } | |
| results = {"load_time": load_time} | |
| for category, prompt in tests.items(): | |
| start = time.time() | |
| response = generate(model, tokenizer, prompt=prompt, max_tokens=50) | |
| results[f"{category}_time"] = time.time() - start | |
| results[f"{category}_sample"] = response[:100] + "..." | |
| return results | |
| if __name__ == "__main__": | |
| results = benchmark_model("./") | |
| print("Benchmark Results:") | |
| for key, value in results.items(): | |
| print(f"{key}: {value}") | |