from huggingface_hub import hf_hub_download from llama_cpp import Llama def load_local_model(): model_path = hf_hub_download( repo_id="tensorblock/phi-1_5-GGUF", filename="phi-1_5-Q4_K_M.gguf" ) llm = Llama( model_path=model_path, n_ctx=4096, n_threads=6, n_gpu_layers=0, verbose=False ) return llm