from huggingface_hub import hf_hub_download
from llama_cpp import Llama

def load_local_model():
    model_path = hf_hub_download(
        repo_id="tensorblock/phi-1_5-GGUF",
        filename="phi-1_5-Q4_K_M.gguf"
    )

    llm = Llama(
        model_path=model_path,
        n_ctx=4096,
        n_threads=6,
        n_gpu_layers=0,
        verbose=False
    )
    return llm