LPX55 commited on
Commit
3544bd9
·
1 Parent(s): 931e0eb

test: memory leak patch

Browse files
Files changed (2) hide show
  1. app.py +4 -2
  2. optimization.py +15 -1
app.py CHANGED
@@ -340,8 +340,10 @@ def load_and_fuse_lora(lora_name):
340
 
341
  load_and_fuse_additional_lora(lora_name)
342
 
343
- # Ahead-of-time compilation
344
- optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
 
 
345
 
346
  @spaces.GPU(duration=60)
347
  def infer(
 
340
 
341
  load_and_fuse_additional_lora(lora_name)
342
 
343
+ # Ahead-of-time compilation with tensor inputs (memory efficient)
344
+ optimize_pipeline_(pipe, image=[torch.randn(1, 3, 1024, 1024, dtype=dtype).to(device), torch.randn(1, 3, 1024, 1024, dtype=dtype).to(device)], prompt="prompt")
345
+
346
+ print("Model compilation complete.")
347
 
348
  @spaces.GPU(duration=60)
349
  def infer(
optimization.py CHANGED
@@ -52,6 +52,10 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
52
 
53
  with spaces.aoti_capture(pipeline.transformer) as call:
54
  pipeline(*args, **kwargs)
 
 
 
 
55
 
56
  dynamic_shapes = tree_map(lambda t: None, call.kwargs)
57
  dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
@@ -65,6 +69,16 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
65
  dynamic_shapes=dynamic_shapes,
66
  )
67
 
68
- return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
 
 
 
 
 
 
 
 
 
 
69
 
70
  spaces.aoti_apply(compile_transformer(), pipeline.transformer)
 
52
 
53
  with spaces.aoti_capture(pipeline.transformer) as call:
54
  pipeline(*args, **kwargs)
55
+
56
+ # Clean up memory after pipeline execution but before compilation
57
+ import gc
58
+ torch.cuda.empty_cache()
59
 
60
  dynamic_shapes = tree_map(lambda t: None, call.kwargs)
61
  dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
 
69
  dynamic_shapes=dynamic_shapes,
70
  )
71
 
72
+ # Clean up memory before compilation
73
+ torch.cuda.empty_cache()
74
+ gc.collect()
75
+
76
+ compiled_result = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
77
+
78
+ # Clean up compilation memory
79
+ torch.cuda.empty_cache()
80
+ gc.collect()
81
+
82
+ return compiled_result
83
 
84
  spaces.aoti_apply(compile_transformer(), pipeline.transformer)