elismasilva's picture
add gguf q8_0
3c66dd7
import os
import time
from pathlib import Path
import torch
from diffusers import FlowMatchEulerDiscreteScheduler, GGUFQuantizationConfig
from diffusers.utils import load_image
from diffusers_local import patch # Apply necessary patches for local diffusers components
# 1. Import all necessary components
from diffusers_local.pipeline_z_image_control_unified import ZImageControlUnifiedPipeline
from diffusers_local.z_image_control_transformer_2d import ZImageControlTransformer2DModel
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,garbage_collection_threshold:0.7,max_split_size_mb:1024"
def main():
# 1. Set params
BASE_MODEL_ID = "."
GGUF_MODEL_FILE = "./transformer/z_image_turbo_control_unified_v2.1_q4_k_m.gguf"
GGUF_MODEL_FILE = "./transformer/z_image_turbo_control_unified_v2.1_q8_0.gguf"
use_gguf = True
prompt = "a asian man with a bottle"
negative_prompt = "Low quality, blurry, ugly, deformed fingers, extra fingers, bad hand, bad anatomy, noise, overexposed, underexposed"
target_height = 1024
target_width = 768
num_inference_steps = 9
guidance_scale = 0
strength = 0.75
seed = 43
shift = 3.0
input_image = load_image("assets/bottle.jpg")
generator = torch.Generator("cuda").manual_seed(seed)
print("Loading Pipeline...")
scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=shift)
if use_gguf:
transformer = ZImageControlTransformer2DModel.from_single_file(
GGUF_MODEL_FILE,
torch_dtype=torch.bfloat16,
config=str(Path(GGUF_MODEL_FILE).parent),
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
use_controlnet=False, # <== Disable control layers to inference speedy
)
else:
transformer = ZImageControlTransformer2DModel.from_pretrained(
BASE_MODEL_ID,
subfolder="transformer",
torch_dtype=torch.bfloat16,
use_controlnet=False, # <== Disable control layers to inference speedy
)
pipe = ZImageControlUnifiedPipeline.from_pretrained(BASE_MODEL_ID, torch_dtype=torch.bfloat16, transformer=transformer)
pipe.scheduler = scheduler
# Apply optimization (Optional)
pipe.enable_group_offload(
onload_device="cuda", offload_device="cpu", offload_type="block_level", num_blocks_per_group=1, low_cpu_mem_usage=True, use_stream=True
)
pipe.vae.use_tiling = True
# ---
print("\nRunning Inference...")
start_inference_time = time.time()
generated_image = pipe(
prompt=prompt,
image=input_image,
strength=strength,
negative_prompt=negative_prompt,
height=target_height,
width=target_width,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator,
).images[0]
end_inference_time = time.time()
print(f"\nGeneration finished in {end_inference_time - start_inference_time:.2f} seconds.")
# Save Output
if not os.path.exists("outputs"):
os.makedirs("outputs")
output_filename = "outputs/z_image_controlnet_result_i2i.png"
generated_image.save(output_filename)
print(f"Image successfully saved as '{output_filename}'")
generated_image.show()
if __name__ == "__main__":
main()