chat2 / app.py
onlyrickroll's picture
Upload app.py
7b2e7fd verified
import gradio as gr
import os
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from llama_cpp.llama_chat_format import LlavaChatHandler
# --- 配置模型信息 ---
# 建议使用 Qwen3-VL 的官方 GGUF 仓库
REPO_ID = "Qwen/Qwen3-VL-8B-Instruct-GGUF"
MODEL_FILE = "Qwen3-VL-8B-Instruct-Q4_K_M.gguf"
MMPROJ_FILE = "mmproj-Qwen3-VL-8B-Instruct-Q8_0.gguf"
print("正在下载/加载模型权重,请稍候...")
# 下载语言模型
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE)
# 下载视觉投影器 (mmproj)
mmproj_path = hf_hub_download(repo_id=REPO_ID, filename=MMPROJ_FILE)
# --- 初始化 Llama 实例 ---
# 关键:Qwen-VL 系列需要专用的 chat_handler 来处理图像输入
chat_handler = LlavaChatHandler(mmproj_path=mmproj_path)
llm = Llama(
model_path=model_path,
chat_handler=chat_handler,
n_ctx=2048, # 上下文窗口,若内存充足可设为 4096
n_threads=4, # 免费Space通常提供4个vCPU
logits_all=True,
n_gpu_layers=-1 if os.environ.get("SPACES_ZERO_GPU") else 0 # 如果有GPU则全量加速
)
def process_input(message, history):
"""
处理 Gradio 的多模态输入
message: {"text": "xxx", "files": ["path/to/img"]}
"""
txt = message["text"]
files = message["files"]
prompt_messages = []
# 构建消息格式
user_content = [{"type": "text", "text": txt}]
if files:
# 处理图片输入
image_url = files[0] # 取第一张图
user_content.append({"type": "image_url", "image_url": {"url": image_url}})
# 构造 llama-cpp 的 chat 格式
response = llm.create_chat_completion(
messages=[{"role": "user", "content": user_content}],
max_tokens=512,
stream=False # 简单起见使用非流式,如需流式可改为 True 并 yield
)
return response["choices"][0]["message"]["content"]
# --- 构建 Gradio 界面 ---
with gr.Blocks() as demo:
gr.Markdown("# 🚀 Qwen3-VL 本地 GGUF 推理")
gr.Markdown("运行在 Space 本地资源上,支持图片分析。")
chatbot = gr.ChatInterface(
fn=process_input,
multimodal=True, # 开启多模态上传功能
type="messages",
)
if __name__ == "__main__":
demo.launch()