| |
|
| | import os |
| | import shutil |
| | import subprocess |
| |
|
| | |
| | print("Cloning the repository...") |
| | subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"]) |
| | print("Installing dependencies...") |
| | subprocess.run(["pip", "install", "-r", "edge_vlm/requirements.txt"]) |
| | subprocess.run(["pip", "install", "sentencepiece"]) |
| |
|
| | |
| | print("Copying files...") |
| | source_dir = "edge_vlm" |
| | destination_dir = "." |
| |
|
| | import torch |
| | import gradio as gr |
| |
|
| |
|
| | for item in os.listdir(source_dir): |
| | source_item = os.path.join(source_dir, item) |
| | destination_item = os.path.join(destination_dir, item) |
| |
|
| | if os.path.isdir(source_item): |
| | if os.path.exists(destination_item): |
| | shutil.rmtree(destination_item) |
| | shutil.copytree(source_item, destination_item) |
| | else: |
| | shutil.copy(source_item, destination_item) |
| |
|
| | print("Files copied successfully.") |
| |
|
| | |
| | from model import MoondreamModel |
| |
|
| | |
| | print("Loading model...") |
| | model = MoondreamModel.load_model() |
| | print("Model loaded.") |
| | print("Loading tokenizer...") |
| | tokenizer = MoondreamModel.load_tokenizer() |
| | print("Tokenizer loaded.") |
| |
|
| | |
| | default_question = "Describe the image." |
| |
|
| | |
| | def generate_caption_with_default(image): |
| | print("Preprocessing image...") |
| | preprocessed_image = MoondreamModel.preprocess_image(image) |
| | print("Image preprocessed.") |
| | |
| | print("Generating caption...") |
| | caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer) |
| | print("Caption generated.") |
| | |
| | return caption |
| |
|
| | |
| | print("Setting up Gradio interface...") |
| | interface = gr.Interface( |
| | fn=generate_caption_with_default, |
| | inputs=gr.Image(type="pil", label="Upload an Image"), |
| | outputs="text", |
| | title="Image Caption Generator", |
| | description=( |
| | f"The default question is: '{default_question}'.\n\n" |
| | "Please note that the inference may take up to 200 seconds due to long captions and CPU limitations.\n\n" |
| | "[](https://huggingface.co/irotem98/edge_vlm) " |
| | "[](https://github.com/rotem154154/edge_vlm)" |
| | ) |
| | ) |
| | |
| | print("Launching interface...") |
| | interface.launch() |
| |
|