#!/usr/bin/env python3 """ Benchmarking script for DWQ model validation """ import time import psutil from mlx_lm import load, generate def benchmark_model(model_path): # Load model start = time.time() model, tokenizer = load(model_path) load_time = time.time() - start # Test categories tests = { "coding": "Write a Python function to sort a list:", "qa": "What is quantum computing?", "reasoning": "If A>B and B>C, what's the relationship between A and C?" } results = {"load_time": load_time} for category, prompt in tests.items(): start = time.time() response = generate(model, tokenizer, prompt=prompt, max_tokens=50) results[f"{category}_time"] = time.time() - start results[f"{category}_sample"] = response[:100] + "..." return results if __name__ == "__main__": results = benchmark_model("./") print("Benchmark Results:") for key, value in results.items(): print(f"{key}: {value}")