HunyuanOCR-ENGLISH

Running

App Files Files Community

aleclyu commited on 10 days ago

Commit

c2b0812

1 Parent(s): 8f37571

debug zerogpu timeout error

Browse files

Files changed (1) hide show

app.py +25 -4

app.py CHANGED Viewed

@@ -51,6 +51,8 @@ def _load_model_processor(args):
     # ZeroGPU 环境：模型在 CPU 上加载，使用 eager 模式
     # 在 @spaces.GPU 装饰器内会自动移到 GPU
     print(f"[INFO] 加载模型（ZeroGPU 环境使用 eager 模式）")
     model = HunYuanVLForConditionalGeneration.from_pretrained(
         args.checkpoint_path,
         attn_implementation="eager",  # ZeroGPU 必须用 eager，因为初始在 CPU
@@ -59,7 +61,8 @@ def _load_model_processor(args):
         token=os.environ.get('HF_TOKEN')
     )
     processor = AutoProcessor.from_pretrained(args.checkpoint_path, use_fast=False, trust_remote_code=True)
-    print(f"[INFO] 模型加载完成")
     return model, processor
@@ -91,20 +94,38 @@ def _gc():
 def _launch_demo(args, model, processor):
     # 关键修复：移除 model 和 processor 参数，使用闭包访问
-    @spaces.GPU(duration=60)
     def call_local_model(messages):
         import time
         start_time = time.time()
         print(f"[DEBUG] ========== 开始推理 ==========")
         # 关键：检查并确保模型在 GPU 上
         model_device = next(model.parameters()).device
         print(f"[DEBUG] Model device: {model_device}")
         if str(model_device) == 'cpu':
             print(f"[ERROR] 模型在 CPU 上！尝试移动到 GPU...")
-            model.cuda()
-            print(f"[DEBUG] Model device after cuda(): {next(model.parameters()).device}")
         messages = [messages]

     # ZeroGPU 环境：模型在 CPU 上加载，使用 eager 模式
     # 在 @spaces.GPU 装饰器内会自动移到 GPU
     print(f"[INFO] 加载模型（ZeroGPU 环境使用 eager 模式）")
+    print(f"[INFO] 加载时 CUDA available: {torch.cuda.is_available()}")
     model = HunYuanVLForConditionalGeneration.from_pretrained(
         args.checkpoint_path,
         attn_implementation="eager",  # ZeroGPU 必须用 eager，因为初始在 CPU
         token=os.environ.get('HF_TOKEN')
     )
     processor = AutoProcessor.from_pretrained(args.checkpoint_path, use_fast=False, trust_remote_code=True)
+    print(f"[INFO] 模型加载完成，当前设备: {next(model.parameters()).device}")
     return model, processor
 def _launch_demo(args, model, processor):
     # 关键修复：移除 model 和 processor 参数，使用闭包访问
+    # 增加 duration 到 120 秒，避免高峰期超时
+    @spaces.GPU(duration=120)
     def call_local_model(messages):
         import time
         start_time = time.time()
         print(f"[DEBUG] ========== 开始推理 ==========")
+        print(f"[DEBUG] CUDA available: {torch.cuda.is_available()}")
+        if torch.cuda.is_available():
+            print(f"[DEBUG] CUDA device count: {torch.cuda.device_count()}")
+            print(f"[DEBUG] Current CUDA device: {torch.cuda.current_device()}")
+            print(f"[DEBUG] Device name: {torch.cuda.get_device_name(0)}")
+            print(f"[DEBUG] GPU Memory allocated: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")
+            print(f"[DEBUG] GPU Memory reserved: {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")
         # 关键：检查并确保模型在 GPU 上
         model_device = next(model.parameters()).device
         print(f"[DEBUG] Model device: {model_device}")
+        print(f"[DEBUG] Model dtype: {next(model.parameters()).dtype}")
         if str(model_device) == 'cpu':
             print(f"[ERROR] 模型在 CPU 上！尝试移动到 GPU...")
+            if torch.cuda.is_available():
+                move_start = time.time()
+                model.cuda()
+                move_time = time.time() - move_start
+                print(f"[DEBUG] Model device after cuda(): {next(model.parameters()).device}")
+                print(f"[DEBUG] 模型移动到 GPU 耗时: {move_time:.2f}s")
+            else:
+                print(f"[CRITICAL] CUDA 不可用！将在 CPU 上运行，速度会很慢！")
+                print(f"[CRITICAL] 这可能是因为 ZeroGPU 资源紧张或超时")
+        else:
+            print(f"[INFO] 模型已在 GPU 上: {model_device}")
         messages = [messages]