Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| from llama_cpp.llama_chat_format import LlavaChatHandler | |
| # --- 配置模型信息 --- | |
| # 建议使用 Qwen3-VL 的官方 GGUF 仓库 | |
| REPO_ID = "Qwen/Qwen3-VL-8B-Instruct-GGUF" | |
| MODEL_FILE = "Qwen3-VL-8B-Instruct-Q4_K_M.gguf" | |
| MMPROJ_FILE = "mmproj-Qwen3-VL-8B-Instruct-Q8_0.gguf" | |
| print("正在下载/加载模型权重,请稍候...") | |
| # 下载语言模型 | |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE) | |
| # 下载视觉投影器 (mmproj) | |
| mmproj_path = hf_hub_download(repo_id=REPO_ID, filename=MMPROJ_FILE) | |
| # --- 初始化 Llama 实例 --- | |
| # 关键:Qwen-VL 系列需要专用的 chat_handler 来处理图像输入 | |
| chat_handler = LlavaChatHandler(mmproj_path=mmproj_path) | |
| llm = Llama( | |
| model_path=model_path, | |
| chat_handler=chat_handler, | |
| n_ctx=2048, # 上下文窗口,若内存充足可设为 4096 | |
| n_threads=4, # 免费Space通常提供4个vCPU | |
| logits_all=True, | |
| n_gpu_layers=-1 if os.environ.get("SPACES_ZERO_GPU") else 0 # 如果有GPU则全量加速 | |
| ) | |
| def process_input(message, history): | |
| """ | |
| 处理 Gradio 的多模态输入 | |
| message: {"text": "xxx", "files": ["path/to/img"]} | |
| """ | |
| txt = message["text"] | |
| files = message["files"] | |
| prompt_messages = [] | |
| # 构建消息格式 | |
| user_content = [{"type": "text", "text": txt}] | |
| if files: | |
| # 处理图片输入 | |
| image_url = files[0] # 取第一张图 | |
| user_content.append({"type": "image_url", "image_url": {"url": image_url}}) | |
| # 构造 llama-cpp 的 chat 格式 | |
| response = llm.create_chat_completion( | |
| messages=[{"role": "user", "content": user_content}], | |
| max_tokens=512, | |
| stream=False # 简单起见使用非流式,如需流式可改为 True 并 yield | |
| ) | |
| return response["choices"][0]["message"]["content"] | |
| # --- 构建 Gradio 界面 --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🚀 Qwen3-VL 本地 GGUF 推理") | |
| gr.Markdown("运行在 Space 本地资源上,支持图片分析。") | |
| chatbot = gr.ChatInterface( | |
| fn=process_input, | |
| multimodal=True, # 开启多模态上传功能 | |
| type="messages", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |