Qwen-Image-Edit-2509-Turbo-Lightning

Running on Zero

LPX55 commited on 3 days ago

Commit

3544bd9

1 Parent(s): 931e0eb

test: memory leak patch

Files changed (2) hide show

app.py CHANGED Viewed

@@ -340,8 +340,10 @@ def load_and_fuse_lora(lora_name):
     load_and_fuse_additional_lora(lora_name)
-# Ahead-of-time compilation
-optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
 @spaces.GPU(duration=60)
 def infer(

     load_and_fuse_additional_lora(lora_name)
+# Ahead-of-time compilation with tensor inputs (memory efficient)
+optimize_pipeline_(pipe, image=[torch.randn(1, 3, 1024, 1024, dtype=dtype).to(device), torch.randn(1, 3, 1024, 1024, dtype=dtype).to(device)], prompt="prompt")
+print("Model compilation complete.")
 @spaces.GPU(duration=60)
 def infer(

optimization.py CHANGED Viewed

@@ -52,6 +52,10 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
         with spaces.aoti_capture(pipeline.transformer) as call:
             pipeline(*args, **kwargs)
         dynamic_shapes = tree_map(lambda t: None, call.kwargs)
         dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
@@ -65,6 +69,16 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
             dynamic_shapes=dynamic_shapes,
         )
-        return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
     spaces.aoti_apply(compile_transformer(), pipeline.transformer)

         with spaces.aoti_capture(pipeline.transformer) as call:
             pipeline(*args, **kwargs)
+        # Clean up memory after pipeline execution but before compilation
+        import gc
+        torch.cuda.empty_cache()
         dynamic_shapes = tree_map(lambda t: None, call.kwargs)
         dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
             dynamic_shapes=dynamic_shapes,
         )
+        # Clean up memory before compilation
+        torch.cuda.empty_cache()
+        gc.collect()
+        compiled_result = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
+        # Clean up compilation memory
+        torch.cuda.empty_cache()
+        gc.collect()
+        return compiled_result
     spaces.aoti_apply(compile_transformer(), pipeline.transformer)