Spaces:

TencentARC
/

BrushEdit

Build error

App Files Files Community

winstoneli commited on Feb 12

Commit

d293559

1 Parent(s): 1ce6f8e

update [qwen2.5]

Browse files

Files changed (3) hide show

app/src/brushedit_app.py +7 -13
app/src/vlm_pipeline.py +5 -5
app/src/vlm_template.py +12 -92

app/src/brushedit_app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from PIL import Image
 from huggingface_hub import hf_hub_download, snapshot_download
 from scipy.ndimage import binary_dilation, binary_erosion
 from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
-                        Qwen2VLForConditionalGeneration, Qwen2VLProcessor)
 from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
 from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
@@ -293,7 +293,7 @@ OUTPUT_IMAGE_PATH = {
 # os.makedirs('gradio_temp_dir', exist_ok=True)
 VLM_MODEL_NAMES = list(vlms_template.keys())
-DEFAULT_VLM_MODEL_NAME = "Qwen2-VL-7B-Instruct (Default)"
 BASE_MODELS = list(base_models_template.keys())
 DEFAULT_BASE_MODEL = "realisticVision (Default)"
@@ -553,18 +553,12 @@ def update_vlm_model(vlm_name):
             return vlm_model_dropdown
         else:
             if os.path.exists(vlm_local_path):
-                vlm_processor = Qwen2VLProcessor.from_pretrained(vlm_local_path)
-                vlm_model = Qwen2VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
             else:
-                if vlm_name == "qwen2-vl-2b-instruct (Preload)":
-                    vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
-                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device)
-                elif vlm_name == "qwen2-vl-7b-instruct (Preload)":
-                    vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
-                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
-                elif vlm_name == "qwen2-vl-72b-instruct (Preload)":
-                    vlm_processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
-                    vlm_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-72B-Instruct", torch_dtype=torch_dtype, device_map=device)
     elif vlm_type == "openai":
         pass
     return "success"

 from huggingface_hub import hf_hub_download, snapshot_download
 from scipy.ndimage import binary_dilation, binary_erosion
 from transformers import (LlavaNextProcessor, LlavaNextForConditionalGeneration,
+                        Qwen2_5_VLForConditionalGeneration, AutoProcessor)
 from segment_anything import SamPredictor, build_sam, SamAutomaticMaskGenerator
 from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
 # os.makedirs('gradio_temp_dir', exist_ok=True)
 VLM_MODEL_NAMES = list(vlms_template.keys())
+DEFAULT_VLM_MODEL_NAME = "Qwen2.5-VL-7B-Instruct (Default)"
 BASE_MODELS = list(base_models_template.keys())
 DEFAULT_BASE_MODEL = "realisticVision (Default)"
             return vlm_model_dropdown
         else:
             if os.path.exists(vlm_local_path):
+                vlm_processor = AutoProcessor.from_pretrained(vlm_local_path)
+                vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(vlm_local_path, torch_dtype=torch_dtype, device_map=device)
             else:
+                if vlm_name == "Qwen2.5-VL-7B-Instruct (Default)":
+                    vlm_processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
+                    vlm_model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device)
     elif vlm_type == "openai":
         pass
     return "success"

app/src/vlm_pipeline.py CHANGED Viewed

@@ -8,7 +8,7 @@ import numpy as np
 import gradio as gr
 from openai import OpenAI
-from transformers import (LlavaNextForConditionalGeneration, Qwen2VLForConditionalGeneration)
 from qwen_vl_utils import process_vision_info
 from app.gpt4_o.instructions import (
@@ -94,7 +94,7 @@ def vlm_response_editing_type(vlm_processor,
     elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
         messages = create_editing_category_messages_llava(editing_prompt)
         response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
-    elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
         messages = create_editing_category_messages_qwen2(editing_prompt)
         response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
@@ -123,7 +123,7 @@ def vlm_response_object_wait_for_edit(vlm_processor,
     elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
         messages = create_ori_object_messages_llava(editing_prompt)
         response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
-    elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
         messages = create_ori_object_messages_qwen2(editing_prompt)
         response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
     return response_str
@@ -155,7 +155,7 @@ def vlm_response_mask(vlm_processor,
             elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
                 messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
                 response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
-            elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
                 base64_image = encode_image(image)
                 messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
                 response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
@@ -217,7 +217,7 @@ def vlm_response_prompt_after_apply_instruction(vlm_processor,
         elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
             messages = create_apply_editing_messages_llava(editing_prompt)
             response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
-        elif isinstance(vlm_model, Qwen2VLForConditionalGeneration):
             base64_image = encode_image(image)
             messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
             response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)

 import gradio as gr
 from openai import OpenAI
+from transformers import (LlavaNextForConditionalGeneration, Qwen2_5_VLForConditionalGeneration)
 from qwen_vl_utils import process_vision_info
 from app.gpt4_o.instructions import (
     elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
         messages = create_editing_category_messages_llava(editing_prompt)
         response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device=device)
+    elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
         messages = create_editing_category_messages_qwen2(editing_prompt)
         response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device=device)
     elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
         messages = create_ori_object_messages_llava(editing_prompt)
         response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image , device)
+    elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
         messages = create_ori_object_messages_qwen2(editing_prompt)
         response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
     return response_str
             elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
                 messages = create_add_object_messages_llava(editing_prompt, height=height, width=width)
                 response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
+            elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
                 base64_image = encode_image(image)
                 messages = create_add_object_messages_qwen2(editing_prompt, base64_image, height=height, width=width)
                 response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)
         elif isinstance(vlm_model, LlavaNextForConditionalGeneration):
             messages = create_apply_editing_messages_llava(editing_prompt)
             response_str = run_llava_next_inference(vlm_processor, vlm_model, messages, image, device)
+        elif isinstance(vlm_model, Qwen2_5_VLForConditionalGeneration):
             base64_image = encode_image(image)
             messages = create_apply_editing_messages_qwen2(editing_prompt, base64_image)
             response_str = run_qwen2_vl_inference(vlm_processor, vlm_model, messages, image, device)

app/src/vlm_template.py CHANGED Viewed

@@ -4,7 +4,7 @@ import torch
 from openai import OpenAI
 from transformers import (
     LlavaNextProcessor, LlavaNextForConditionalGeneration,
-    Qwen2VLForConditionalGeneration, Qwen2VLProcessor
 )
 ## init device
 device = "cuda"
@@ -12,100 +12,20 @@ torch_dtype = torch.float16
 vlms_list = [
-    # {
-    #     "type": "llava-next",
-    #     "name": "llava-v1.6-mistral-7b-hf",
-    #     "local_path": "models/vlms/llava-v1.6-mistral-7b-hf",
-    #     "processor": LlavaNextProcessor.from_pretrained(
-    #         "models/vlms/llava-v1.6-mistral-7b-hf"
-    #     ) if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else LlavaNextProcessor.from_pretrained(
-    #         "llava-hf/llava-v1.6-mistral-7b-hf"
-    #     ),
-    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
-    #         "models/vlms/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
-    #     ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-mistral-7b-hf") else
-    #         LlavaNextForConditionalGeneration.from_pretrained(
-    #             "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch_dtype, device_map=device
-    #         ).to("cpu"),
-    # },
-    # {
-    #     "type": "llava-next",
-    #     "name": "llama3-llava-next-8b-hf (Preload)",
-    #     "local_path": "models/vlms/llama3-llava-next-8b-hf",
-    #     "processor": LlavaNextProcessor.from_pretrained(
-    #         "models/vlms/llama3-llava-next-8b-hf"
-    #     ) if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else LlavaNextProcessor.from_pretrained(
-    #         "llava-hf/llama3-llava-next-8b-hf"
-    #     ),
-    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
-    #         "models/vlms/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
-    #     ).to("cpu") if os.path.exists("models/vlms/llama3-llava-next-8b-hf") else
-    #         LlavaNextForConditionalGeneration.from_pretrained(
-    #             "llava-hf/llama3-llava-next-8b-hf", torch_dtype=torch_dtype, device_map=device
-    #         ).to("cpu"),
-    # },
-    # {
-    #     "type": "llava-next",
-    #     "name": "llava-v1.6-vicuna-13b-hf",
-    #     "local_path": "models/vlms/llava-v1.6-vicuna-13b-hf",
-    #     "processor": LlavaNextProcessor.from_pretrained(
-    #         "models/vlms/llava-v1.6-vicuna-13b-hf"
-    #     ) if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else LlavaNextProcessor.from_pretrained(
-    #         "llava-hf/llava-v1.6-vicuna-13b-hf"
-    #     ),
-    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
-    #         "models/vlms/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
-    #     ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-vicuna-13b-hf") else
-    #         LlavaNextForConditionalGeneration.from_pretrained(
-    #             "llava-hf/llava-v1.6-vicuna-13b-hf", torch_dtype=torch_dtype, device_map=device
-    #         ).to("cpu"),
-    # },
-    # {
-    #     "type": "llava-next",
-    #     "name": "llava-v1.6-34b-hf",
-    #     "local_path": "models/vlms/llava-v1.6-34b-hf",
-    #     "processor": LlavaNextProcessor.from_pretrained(
-    #         "models/vlms/llava-v1.6-34b-hf"
-    #     ) if os.path.exists("models/vlms/llava-v1.6-34b-hf") else LlavaNextProcessor.from_pretrained(
-    #         "llava-hf/llava-v1.6-34b-hf"
-    #     ),
-    #     "model": LlavaNextForConditionalGeneration.from_pretrained(
-    #         "models/vlms/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
-    #     ).to("cpu") if os.path.exists("models/vlms/llava-v1.6-34b-hf") else
-    #         LlavaNextForConditionalGeneration.from_pretrained(
-    #             "llava-hf/llava-v1.6-34b-hf", torch_dtype=torch_dtype, device_map=device
-    #         ).to("cpu"),
-    # },
-    # {
-    #     "type": "qwen2-vl",
-    #     "name": "Qwen2-VL-2B-Instruct",
-    #     "local_path": "models/vlms/Qwen2-VL-2B-Instruct",
-    #     "processor": Qwen2VLProcessor.from_pretrained(
-    #         "models/vlms/Qwen2-VL-2B-Instruct"
-    #     ) if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else Qwen2VLProcessor.from_pretrained(
-    #         "Qwen/Qwen2-VL-2B-Instruct"
-    #     ),
-    #     "model": Qwen2VLForConditionalGeneration.from_pretrained(
-    #         "models/vlms/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
-    #     ).to("cpu") if os.path.exists("models/vlms/Qwen2-VL-2B-Instruct") else
-    #         Qwen2VLForConditionalGeneration.from_pretrained(
-    #             "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch_dtype, device_map=device
-    #         ).to("cpu"),
-    # },
     {
         "type": "qwen2-vl",
-        "name": "Qwen2-VL-7B-Instruct (Default)",
-        "local_path": "models/vlms/Qwen2-VL-7B-Instruct",
-        "processor": Qwen2VLProcessor.from_pretrained(
-            "models/vlms/Qwen2-VL-7B-Instruct"
-        ) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else Qwen2VLProcessor.from_pretrained(
-            "Qwen/Qwen2-VL-7B-Instruct"
         ),
-        "model": Qwen2VLForConditionalGeneration.from_pretrained(
-            "models/vlms/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
-        ).to(device) if os.path.exists("models/vlms/Qwen2-VL-7B-Instruct") else
-            Qwen2VLForConditionalGeneration.from_pretrained(
-                "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
             ).to(device),
     },
     {

 from openai import OpenAI
 from transformers import (
     LlavaNextProcessor, LlavaNextForConditionalGeneration,
+    Qwen2_5_VLForConditionalGeneration, AutoProcessor
 )
 ## init device
 device = "cuda"
 vlms_list = [
     {
         "type": "qwen2-vl",
+        "name": "Qwen2.5-VL-7B-Instruct (Default)",
+        "local_path": "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct",
+        "processor": AutoProcessor.from_pretrained(
+            "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct"
+        ) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else AutoProcessor.from_pretrained(
+            "Qwen/Qwen2.5-VL-7B-Instruct"
         ),
+        "model": Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            "models/vlms/Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
+        ).to(device) if os.path.exists("models/vlms/Qwen/Qwen2.5-VL-7B-Instruct") else
+            Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype=torch_dtype, device_map=device
             ).to(device),
     },
     {