|
|
import os |
|
|
import time |
|
|
from pathlib import Path |
|
|
|
|
|
import torch |
|
|
from diffusers import FlowMatchEulerDiscreteScheduler, GGUFQuantizationConfig |
|
|
from diffusers.utils import load_image |
|
|
from diffusers_local import patch |
|
|
|
|
|
|
|
|
from diffusers_local.pipeline_z_image_control_unified import ZImageControlUnifiedPipeline |
|
|
from diffusers_local.z_image_control_transformer_2d import ZImageControlTransformer2DModel |
|
|
|
|
|
|
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,garbage_collection_threshold:0.7,max_split_size_mb:1024" |
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
BASE_MODEL_ID = "." |
|
|
GGUF_MODEL_FILE = "./transformer/z_image_turbo_control_unified_v2.1_q4_k_m.gguf" |
|
|
GGUF_MODEL_FILE = "./transformer/z_image_turbo_control_unified_v2.1_q8_0.gguf" |
|
|
|
|
|
use_gguf = True |
|
|
|
|
|
prompt = "a asian man with a bottle" |
|
|
negative_prompt = "Low quality, blurry, ugly, deformed fingers, extra fingers, bad hand, bad anatomy, noise, overexposed, underexposed" |
|
|
|
|
|
target_height = 1024 |
|
|
target_width = 768 |
|
|
num_inference_steps = 9 |
|
|
guidance_scale = 0 |
|
|
strength = 0.75 |
|
|
seed = 43 |
|
|
shift = 3.0 |
|
|
input_image = load_image("assets/bottle.jpg") |
|
|
generator = torch.Generator("cuda").manual_seed(seed) |
|
|
|
|
|
print("Loading Pipeline...") |
|
|
scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=shift) |
|
|
|
|
|
if use_gguf: |
|
|
transformer = ZImageControlTransformer2DModel.from_single_file( |
|
|
GGUF_MODEL_FILE, |
|
|
torch_dtype=torch.bfloat16, |
|
|
config=str(Path(GGUF_MODEL_FILE).parent), |
|
|
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), |
|
|
use_controlnet=False, |
|
|
) |
|
|
else: |
|
|
transformer = ZImageControlTransformer2DModel.from_pretrained( |
|
|
BASE_MODEL_ID, |
|
|
subfolder="transformer", |
|
|
torch_dtype=torch.bfloat16, |
|
|
use_controlnet=False, |
|
|
) |
|
|
pipe = ZImageControlUnifiedPipeline.from_pretrained(BASE_MODEL_ID, torch_dtype=torch.bfloat16, transformer=transformer) |
|
|
pipe.scheduler = scheduler |
|
|
|
|
|
|
|
|
pipe.enable_group_offload( |
|
|
onload_device="cuda", offload_device="cpu", offload_type="block_level", num_blocks_per_group=1, low_cpu_mem_usage=True, use_stream=True |
|
|
) |
|
|
pipe.vae.use_tiling = True |
|
|
|
|
|
|
|
|
print("\nRunning Inference...") |
|
|
start_inference_time = time.time() |
|
|
|
|
|
generated_image = pipe( |
|
|
prompt=prompt, |
|
|
image=input_image, |
|
|
strength=strength, |
|
|
negative_prompt=negative_prompt, |
|
|
height=target_height, |
|
|
width=target_width, |
|
|
num_inference_steps=num_inference_steps, |
|
|
guidance_scale=guidance_scale, |
|
|
generator=generator, |
|
|
).images[0] |
|
|
|
|
|
end_inference_time = time.time() |
|
|
print(f"\nGeneration finished in {end_inference_time - start_inference_time:.2f} seconds.") |
|
|
|
|
|
|
|
|
if not os.path.exists("outputs"): |
|
|
os.makedirs("outputs") |
|
|
output_filename = "outputs/z_image_controlnet_result_i2i.png" |
|
|
generated_image.save(output_filename) |
|
|
print(f"Image successfully saved as '{output_filename}'") |
|
|
generated_image.show() |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|