#!/usr/bin/env python3
"""
Benchmarking script for DWQ model validation
"""

import time
import psutil
from mlx_lm import load, generate

def benchmark_model(model_path):
    # Load model
    start = time.time()
    model, tokenizer = load(model_path)
    load_time = time.time() - start
    
    # Test categories
    tests = {
        "coding": "Write a Python function to sort a list:",
        "qa": "What is quantum computing?",
        "reasoning": "If A>B and B>C, what's the relationship between A and C?"
    }
    
    results = {"load_time": load_time}
    
    for category, prompt in tests.items():
        start = time.time()
        response = generate(model, tokenizer, prompt=prompt, max_tokens=50)
        results[f"{category}_time"] = time.time() - start
        results[f"{category}_sample"] = response[:100] + "..."
    
    return results

if __name__ == "__main__":
    results = benchmark_model("./")
    print("Benchmark Results:")
    for key, value in results.items():
        print(f"{key}: {value}")