import gradio as gr from llama_cpp import Llama from huggingface_hub import snapshot_download import os # Baixe o GGUF direto do Hub no runtime (pula limite de 1GB upload) MODEL_REPO = "mradermacher/DeepHat-V1-7B-GGUF" MODEL_FILE = "DeepHat-V1-7B.Q4_K_M.gguf" # ~4.8GB, baixa uma vez e cacheia LOCAL_PATH = "./models/" # Pasta local no Space # Função pra carregar modelo (roda na init) def load_model(): os.makedirs(LOCAL_PATH, exist_ok=True) model_path = snapshot_download( repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=LOCAL_PATH, local_dir_use_symlinks=False # Evita links quebrados ) full_path = os.path.join(model_path, MODEL_FILE) llm = Llama( model_path=full_path, n_ctx=2048, n_threads=4, verbose=False ) return llm # Carregue na init (leva ~5-10 min na primeira build, depois cache) print("Baixando DeepHat... (pode demorar na CPU)") llm = load_model() def generate_response(prompt, max_tokens=500): full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" output = llm( full_prompt, max_tokens=max_tokens, temperature=0.7, top_p=0.9, stop=["<|im_end|>", ""] ) return output['choices'][0]['text'].strip() # Interface Gradio with gr.Blocks(title="DeepHat Uncensored Chat") as demo: gr.Markdown("# DeepHat - IA Uncensored pra Cibersegurança & Hacking Ético") chatbot = gr.Chatbot() msg = gr.Textbox(placeholder="Pergunte sobre hacking WiFi, pentest ou censura...") clear = gr.Button("Clear") def respond(message, chat_history): bot_message = generate_response(message) chat_history.append((message, bot_message)) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()