Spaces:
Running
on
Zero
Running
on
Zero
test: memory leak patch
Browse files- app.py +4 -2
- optimization.py +15 -1
app.py
CHANGED
|
@@ -340,8 +340,10 @@ def load_and_fuse_lora(lora_name):
|
|
| 340 |
|
| 341 |
load_and_fuse_additional_lora(lora_name)
|
| 342 |
|
| 343 |
-
# Ahead-of-time compilation
|
| 344 |
-
optimize_pipeline_(pipe, image=[
|
|
|
|
|
|
|
| 345 |
|
| 346 |
@spaces.GPU(duration=60)
|
| 347 |
def infer(
|
|
|
|
| 340 |
|
| 341 |
load_and_fuse_additional_lora(lora_name)
|
| 342 |
|
| 343 |
+
# Ahead-of-time compilation with tensor inputs (memory efficient)
|
| 344 |
+
optimize_pipeline_(pipe, image=[torch.randn(1, 3, 1024, 1024, dtype=dtype).to(device), torch.randn(1, 3, 1024, 1024, dtype=dtype).to(device)], prompt="prompt")
|
| 345 |
+
|
| 346 |
+
print("Model compilation complete.")
|
| 347 |
|
| 348 |
@spaces.GPU(duration=60)
|
| 349 |
def infer(
|
optimization.py
CHANGED
|
@@ -52,6 +52,10 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
|
|
| 52 |
|
| 53 |
with spaces.aoti_capture(pipeline.transformer) as call:
|
| 54 |
pipeline(*args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
dynamic_shapes = tree_map(lambda t: None, call.kwargs)
|
| 57 |
dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
|
|
@@ -65,6 +69,16 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
|
|
| 65 |
dynamic_shapes=dynamic_shapes,
|
| 66 |
)
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
spaces.aoti_apply(compile_transformer(), pipeline.transformer)
|
|
|
|
| 52 |
|
| 53 |
with spaces.aoti_capture(pipeline.transformer) as call:
|
| 54 |
pipeline(*args, **kwargs)
|
| 55 |
+
|
| 56 |
+
# Clean up memory after pipeline execution but before compilation
|
| 57 |
+
import gc
|
| 58 |
+
torch.cuda.empty_cache()
|
| 59 |
|
| 60 |
dynamic_shapes = tree_map(lambda t: None, call.kwargs)
|
| 61 |
dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
|
|
|
|
| 69 |
dynamic_shapes=dynamic_shapes,
|
| 70 |
)
|
| 71 |
|
| 72 |
+
# Clean up memory before compilation
|
| 73 |
+
torch.cuda.empty_cache()
|
| 74 |
+
gc.collect()
|
| 75 |
+
|
| 76 |
+
compiled_result = spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
|
| 77 |
+
|
| 78 |
+
# Clean up compilation memory
|
| 79 |
+
torch.cuda.empty_cache()
|
| 80 |
+
gc.collect()
|
| 81 |
+
|
| 82 |
+
return compiled_result
|
| 83 |
|
| 84 |
spaces.aoti_apply(compile_transformer(), pipeline.transformer)
|