import os import requests import gradio as gr # 从环境变量中读取你的 HF API Token HF_API_TOKEN = os.environ.get("HF_API_TOKEN") if HF_API_TOKEN is None: raise RuntimeError( "环境变量 HF_API_TOKEN 未设置,请在 Space 的 Settings -> Variables 中添加一个名为 HF_API_TOKEN 的 Secret。" ) # 想使用的模型 ID,可以自行替换为其他支持 Inference API 的模型 # 比如 "meta-llama/Llama-3.2-1B-Instruct"、"Qwen/Qwen2.5-1.5B-Instruct" 等 MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct" API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}" HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} def query_hf_api(prompt: str, max_new_tokens: int = 256, temperature: float = 0.7) -> str: payload = { "inputs": prompt, "parameters": { "max_new_tokens": max_new_tokens, "temperature": temperature, "do_sample": True, }, } response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=120) response.raise_for_status() data = response.json() # text-generation 类模型常见返回格式是 [{"generated_text": "..."}] if isinstance(data, list) and len(data) > 0: return data[0].get("generated_text", "").strip() # 兜底:直接把返回内容转成字符串方便调试 return str(data) def chat_fn(history, message, max_new_tokens, temperature): # 简单地把历史对话拼成一个长 prompt dialog = "" if history: for user_msg, bot_msg in history: dialog += f"用户: {user_msg}\n助手: {bot_msg}\n" dialog += f"用户: {message}\n助手:" try: output = query_hf_api(dialog, max_new_tokens=int(max_new_tokens), temperature=float(temperature)) except Exception as e: output = f"[调用模型出错] {type(e).__name__}: {e}" history = history + [(message, output)] return history, "" with gr.Blocks() as demo: gr.Markdown(f"# 云端模型聊天 Demo\n使用模型:`{MODEL_ID}`(通过 Hugging Face Inference API)") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot(label="对话", height=500) msg = gr.Textbox( label="你的问题", placeholder="输入你想问的问题,回车或点击发送", lines=2, ) send_btn = gr.Button("发送") clear_btn = gr.Button("清空对话") with gr.Column(scale=1): gr.Markdown("### 参数设置") max_new_tokens = gr.Slider(16, 512, value=256, step=16, label="max_new_tokens") temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature") send_btn.click( chat_fn, inputs=[chatbot, msg, max_new_tokens, temperature], outputs=[chatbot, msg], ) msg.submit( chat_fn, inputs=[chatbot, msg, max_new_tokens, temperature], outputs=[chatbot, msg], ) clear_btn.click(lambda: ([], ""), None, [chatbot, msg]) if __name__ == "__main__": # 不要给 launch() 传额外参数,HF 会自己管理 host/port demo.launch()