from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig import torch def load_local_model(model_path: str, device: int = -1, token: str = None): """ Load a Hugging Face model (CPU by default) with optional token for private repos. Args: model_path (str): Hugging Face repo ID or local path. device (int): -1 for CPU, >=0 for GPU index. token (str): HF token for private models. Returns: model, tokenizer """ try: tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=token) except Exception as e: raise RuntimeError(f"Failed to load tokenizer: {e}") try: config = AutoConfig.from_pretrained(model_path, use_auth_token=token) model = AutoModelForCausalLM.from_pretrained( model_path, config=config, use_auth_token=token ) # Device mapping if device >= 0 and torch.cuda.is_available(): model.to(f"cuda:{device}") else: model.to("cpu") except Exception as e: raise RuntimeError(f"Failed to load model: {e}") return model, tokenizer