Spaces:
Build error
Build error
winstoneli
commited on
Commit
·
d293559
1
Parent(s):
1ce6f8e
update [qwen2.5]
Browse files- app/src/brushedit_app.py +7 -13
- app/src/vlm_pipeline.py +5 -5
- app/src/vlm_template.py +12 -92
app/src/brushedit_app.py
CHANGED
|
@@ -15,7 +15,7 @@ from PIL import Image
|
|
| 15 |
from huggingface_hub import hf_hub_download, snapshot_download
|
| 16 |
from scipy.ndimage import binary_dilation, binary_erosion
|
| 17 |
from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
| 18 |
-
|
| 19 |
|
| 20 |
from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
|
| 21 |
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
|
|
@@ -293,7 +293,7 @@ OUTPUT_IMAGE_PATH = {
|
|
| 293 |
# os.makedirs('gradio_temp_dir', exist_ok=True)
|
| 294 |
|
| 295 |
VLM_MODEL_NAMES = list(vlms_template.keys())
|
| 296 |
-
DEFAULT_VLM_MODEL_NAME = "Qwen2-VL-7B-Instruct (Default)"
|
| 297 |
BASE_MODELS = list(base_models_template.keys())
|
| 298 |
DEFAULT_BASE_MODEL = "realisticVision (Default)"
|
| 299 |
|
|
@@ -553,18 +553,12 @@ def update_vlm_model(vlm_name):
|
|
| 553 |
return vlm_model_dropdown
|
| 554 |
else:
|
| 555 |
if os.path.exists(vlm_local_path):
|
| 556 |
-
vlm_processor =
|
| 557 |
-
vlm_model =
|
| 558 |
else:
|
| 559 |
-
if vlm_name == "
|
| 560 |
-
vlm_processor =
|
| 561 |
-
vlm_model =
|
| 562 |
-
elif vlm_name == "qwen2-vl-7b-instruct (Preload)":
|
| 563 |
-
vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
| 564 |
-
vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
|
| 565 |
-
elif vlm_name == "qwen2-vl-72b-instruct (Preload)":
|
| 566 |
-
vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
|
| 567 |
-
vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-72B-Instruct", torch_dtype=torch_dtype, device_map=device)
|
| 568 |
elif vlm_type == "openai":
|
| 569 |
pass
|
| 570 |
return "success"
|
|
|
|
| 15 |
from huggingface_hub import hf_hub_download, snapshot_download
|
| 16 |
from scipy.ndimage import binary_dilation, binary_erosion
|
| 17 |
from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
| 18 |
+
Qwen2_5_VLForConditionalGeneration, AutoProcessor)
|
| 19 |
|
| 20 |
from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
|
| 21 |
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
|
|
|
|
| 293 |
# os.makedirs('gradio_temp_dir', exist_ok=True)
|
| 294 |
|
| 295 |
VLM_MODEL_NAMES = list(vlms_template.keys())
|
| 296 |
+
DEFAULT_VLM_MODEL_NAME = "Qwen2.5-VL-7B-Instruct (Default)"
|
| 297 |
BASE_MODELS = list(base_models_template.keys())
|
| 298 |
DEFAULT_BASE_MODEL = "realisticVision (Default)"
|
| 299 |
|
|
|
|
| 553 |
return vlm_model_dropdown
|
| 554 |
else:
|
| 555 |
if os.path.exists(vlm_local_path):
|
| 556 |
+
vlm_processor = AutoProcessor.from_pretrained(vlm_local_path)
|
| 557 |
+
vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
|
| 558 |
else:
|
| 559 |
+
if vlm_name == "Qwen2.5-VL-7B-Instruct (Default)":
|
| 560 |
+
vlm_processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
|
| 561 |
+
vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
elif vlm_type == "openai":
|
| 563 |
pass
|
| 564 |
return "success"
|
app/src/vlm_pipeline.py
CHANGED
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
from openai import OpenAI
|
| 11 |
-
from transformers import (LlavaNextForConditionalGeneration,
|
| 12 |
from qwen_vl_utils import process_vision_info
|
| 13 |
|
| 14 |
from app.gpt4_o.instructions import (
|
|
@@ -94,7 +94,7 @@ def vlm_response_editing_type(vlm_processor,
|
|
| 94 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 95 |
messages = create_editing_category_messages_llava(editing_prompt)
|
| 96 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
|
| 97 |
-
elif isinstance(vlm_model,
|
| 98 |
messages = create_editing_category_messages_qwen2(editing_prompt)
|
| 99 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
|
| 100 |
|
|
@@ -123,7 +123,7 @@ def vlm_response_object_wait_for_edit(vlm_processor,
|
|
| 123 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 124 |
messages = create_ori_object_messages_llava(editing_prompt)
|
| 125 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
|
| 126 |
-
elif isinstance(vlm_model,
|
| 127 |
messages = create_ori_object_messages_qwen2(editing_prompt)
|
| 128 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
| 129 |
return response_str
|
|
@@ -155,7 +155,7 @@ def vlm_response_mask(vlm_processor,
|
|
| 155 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 156 |
messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
|
| 157 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
| 158 |
-
elif isinstance(vlm_model,
|
| 159 |
base64_image = encode_image(image)
|
| 160 |
messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
|
| 161 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
|
@@ -217,7 +217,7 @@ def vlm_response_prompt_after_apply_instruction(vlm_processor,
|
|
| 217 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 218 |
messages = create_apply_editing_messages_llava(editing_prompt)
|
| 219 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
| 220 |
-
elif isinstance(vlm_model,
|
| 221 |
base64_image = encode_image(image)
|
| 222 |
messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
|
| 223 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
from openai import OpenAI
|
| 11 |
+
from transformers import (LlavaNextForConditionalGeneration, Qwen2_5_VLForConditionalGeneration)
|
| 12 |
from qwen_vl_utils import process_vision_info
|
| 13 |
|
| 14 |
from app.gpt4_o.instructions import (
|
|
|
|
| 94 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 95 |
messages = create_editing_category_messages_llava(editing_prompt)
|
| 96 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
|
| 97 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
| 98 |
messages = create_editing_category_messages_qwen2(editing_prompt)
|
| 99 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
|
| 100 |
|
|
|
|
| 123 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 124 |
messages = create_ori_object_messages_llava(editing_prompt)
|
| 125 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
|
| 126 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
| 127 |
messages = create_ori_object_messages_qwen2(editing_prompt)
|
| 128 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
| 129 |
return response_str
|
|
|
|
| 155 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 156 |
messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
|
| 157 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
| 158 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
| 159 |
base64_image = encode_image(image)
|
| 160 |
messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
|
| 161 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
|
|
|
| 217 |
elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
|
| 218 |
messages = create_apply_editing_messages_llava(editing_prompt)
|
| 219 |
response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
|
| 220 |
+
elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
|
| 221 |
base64_image = encode_image(image)
|
| 222 |
messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
|
| 223 |
response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
|
app/src/vlm_template.py
CHANGED
|
@@ -4,7 +4,7 @@ import torch
|
|
| 4 |
from openai import OpenAI
|
| 5 |
from transformers import (
|
| 6 |
LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
| 7 |
-
|
| 8 |
)
|
| 9 |
## init device
|
| 10 |
device = "cuda"
|
|
@@ -12,100 +12,20 @@ torch_dtype = torch.float16
|
|
| 12 |
|
| 13 |
|
| 14 |
vlms_list = [
|
| 15 |
-
# {
|
| 16 |
-
# "type": "llava-next",
|
| 17 |
-
# "name": "llava-v1.6-mistral-7b-hf",
|
| 18 |
-
# "local_path": "models/vlms/llava-v1.6-mistral-7b-hf",
|
| 19 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
| 20 |
-
# "models/vlms/llava-v1.6-mistral-7b-hf"
|
| 21 |
-
# ) if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else LlavaNextProcessor.from_pretrained(
|
| 22 |
-
# "llava-hf/llava-v1.6-mistral-7b-hf"
|
| 23 |
-
# ),
|
| 24 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
| 25 |
-
# "models/vlms/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
|
| 26 |
-
# ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else
|
| 27 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
| 28 |
-
# "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
|
| 29 |
-
# ).to("cpu"),
|
| 30 |
-
# },
|
| 31 |
-
# {
|
| 32 |
-
# "type": "llava-next",
|
| 33 |
-
# "name": "llama3-llava-next-8b-hf (Preload)",
|
| 34 |
-
# "local_path": "models/vlms/llama3-llava-next-8b-hf",
|
| 35 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
| 36 |
-
# "models/vlms/llama3-llava-next-8b-hf"
|
| 37 |
-
# ) if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else LlavaNextProcessor.from_pretrained(
|
| 38 |
-
# "llava-hf/llama3-llava-next-8b-hf"
|
| 39 |
-
# ),
|
| 40 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
| 41 |
-
# "models/vlms/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
|
| 42 |
-
# ).to("cpu") if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else
|
| 43 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
| 44 |
-
# "llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
|
| 45 |
-
# ).to("cpu"),
|
| 46 |
-
# },
|
| 47 |
-
# {
|
| 48 |
-
# "type": "llava-next",
|
| 49 |
-
# "name": "llava-v1.6-vicuna-13b-hf",
|
| 50 |
-
# "local_path": "models/vlms/llava-v1.6-vicuna-13b-hf",
|
| 51 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
| 52 |
-
# "models/vlms/llava-v1.6-vicuna-13b-hf"
|
| 53 |
-
# ) if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else LlavaNextProcessor.from_pretrained(
|
| 54 |
-
# "llava-hf/llava-v1.6-vicuna-13b-hf"
|
| 55 |
-
# ),
|
| 56 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
| 57 |
-
# "models/vlms/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
|
| 58 |
-
# ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else
|
| 59 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
| 60 |
-
# "llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
|
| 61 |
-
# ).to("cpu"),
|
| 62 |
-
# },
|
| 63 |
-
# {
|
| 64 |
-
# "type": "llava-next",
|
| 65 |
-
# "name": "llava-v1.6-34b-hf",
|
| 66 |
-
# "local_path": "models/vlms/llava-v1.6-34b-hf",
|
| 67 |
-
# "processor": LlavaNextProcessor.from_pretrained(
|
| 68 |
-
# "models/vlms/llava-v1.6-34b-hf"
|
| 69 |
-
# ) if os.path.exists("models/vlms/llava-v1.6-34b-hf") else LlavaNextProcessor.from_pretrained(
|
| 70 |
-
# "llava-hf/llava-v1.6-34b-hf"
|
| 71 |
-
# ),
|
| 72 |
-
# "model": LlavaNextForConditionalGeneration.from_pretrained(
|
| 73 |
-
# "models/vlms/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
|
| 74 |
-
# ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-34b-hf") else
|
| 75 |
-
# LlavaNextForConditionalGeneration.from_pretrained(
|
| 76 |
-
# "llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
|
| 77 |
-
# ).to("cpu"),
|
| 78 |
-
# },
|
| 79 |
-
# {
|
| 80 |
-
# "type": "qwen2-vl",
|
| 81 |
-
# "name": "Qwen2-VL-2B-Instruct",
|
| 82 |
-
# "local_path": "models/vlms/Qwen2-VL-2B-Instruct",
|
| 83 |
-
# "processor": Qwen2VLProcessor.from_pretrained(
|
| 84 |
-
# "models/vlms/Qwen2-VL-2B-Instruct"
|
| 85 |
-
# ) if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else Qwen2VLProcessor.from_pretrained(
|
| 86 |
-
# "Qwen/Qwen2-VL-2B-Instruct"
|
| 87 |
-
# ),
|
| 88 |
-
# "model": Qwen2VLForConditionalGeneration.from_pretrained(
|
| 89 |
-
# "models/vlms/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
|
| 90 |
-
# ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else
|
| 91 |
-
# Qwen2VLForConditionalGeneration.from_pretrained(
|
| 92 |
-
# "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
|
| 93 |
-
# ).to("cpu"),
|
| 94 |
-
# },
|
| 95 |
{
|
| 96 |
"type": "qwen2-vl",
|
| 97 |
-
"name": "Qwen2-VL-7B-Instruct (Default)",
|
| 98 |
-
"local_path": "models/vlms/Qwen2-VL-7B-Instruct",
|
| 99 |
-
"processor":
|
| 100 |
-
"models/vlms/Qwen2-VL-7B-Instruct"
|
| 101 |
-
) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
|
| 102 |
-
"Qwen/Qwen2-VL-7B-Instruct"
|
| 103 |
),
|
| 104 |
-
"model":
|
| 105 |
-
"models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
| 106 |
-
).to(device) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
|
| 107 |
-
|
| 108 |
-
"Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
| 109 |
).to(device),
|
| 110 |
},
|
| 111 |
{
|
|
|
|
| 4 |
from openai import OpenAI
|
| 5 |
from transformers import (
|
| 6 |
LlavaNextProcessor, LlavaNextForConditionalGeneration,
|
| 7 |
+
Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
| 8 |
)
|
| 9 |
## init device
|
| 10 |
device = "cuda"
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
vlms_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
{
|
| 16 |
"type": "qwen2-vl",
|
| 17 |
+
"name": "Qwen2.5-VL-7B-Instruct (Default)",
|
| 18 |
+
"local_path": "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct",
|
| 19 |
+
"processor": AutoProcessor.from_pretrained(
|
| 20 |
+
"models/vlms/Qwen/Qwen2.5-VL-7B-Instruct"
|
| 21 |
+
) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else AutoProcessor.from_pretrained(
|
| 22 |
+
"Qwen/Qwen2.5-VL-7B-Instruct"
|
| 23 |
),
|
| 24 |
+
"model": Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 25 |
+
"models/vlms/Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
| 26 |
+
).to(device) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else
|
| 27 |
+
Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 28 |
+
"Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
|
| 29 |
).to(device),
|
| 30 |
},
|
| 31 |
{
|