shuangzhiaishang's picture
Update app.py
68ebecc verified
import re
import gradio as gr
from model import ToyModel
"""
Model specification
"""
vision_model_path = 'openai/clip-vit-base-patch32'
language_model_path = 'openai-community/gpt2'
model = ToyModel(vision_model_path, language_model_path)
def chat(image_input, text_input):
text_output = model.chat(image_input, text_input)
return image_input, text_output
"""
Gradio
"""
def gradio_taskselect(idx):
prompt_list = [
'',
'[grounding] describe this image in detail',
'[refer] ',
'[detection] ',
'[identify] what is this ',
'[vqa] '
]
instruct_list = [
'**Hint:** Type in whatever you want',
'**Hint:** Send the command to generate a grounded image description',
'**Hint:** Type in a phrase about an object in the image and send the command',
'**Hint:** Type in a caption or phrase, and see object locations in the image',
'**Hint:** Draw a bounding box on the uploaded image then send the command. Click the "clear" botton on the '
'top right of the image before redraw',
'**Hint:** Send a question to get a short answer',
]
return prompt_list[idx], instruct_list[idx]
title = """<h1 align="center">RS-Visual Perception Demo</h1>"""
description = 'Welcome to Our RS-Visual Perception Demo!'
introduction = '''
For Abilities Involving Visual Grounding:
1. Grounding: CLICK **Send** to generate a grounded image description.
2. Refer: Input a referring object and CLICK **Send**.
3. Detection: Write a caption or phrase, and CLICK **Send**.
4. Identify: Draw the bounding box on the uploaded image window and CLICK **Send** to generate the bounding box. (CLICK "clear" button before re-drawing next time).
5. VQA: Input a visual question and CLICK **Send**.
6. No Tag: Input whatever you want and CLICK **Send** without any tagging
You can also simply chat in free form!
'''
with gr.Blocks() as demo:
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=0.5):
image_input = gr.Image(type="pil", label="Input Image")
temperature = gr.Slider(
minimum=0.1,
maximum=1.5,
value=0.6,
step=0.1,
interactive=True,
label="Temperature",
)
dataset = gr.Dataset(
components=[gr.Textbox(visible=False)],
samples=[['No Tag'], ['Grounding'], ['Refer'], ['Detection'], ['Identify'], ['VQA']],
type="index",
label='Task Shortcuts',
)
task_inst = gr.Markdown('**Hint:** Upload your image and chat')
text_input = gr.Textbox(label='Input text', placeholder='Upload your image and chat', interactive=True, )
submit_button = gr.Button("Submit", variant='primary', size='sm', scale=1)
gr.Markdown(introduction)
with gr.Column():
image_output = gr.Image(type="pil", label='Output image')
text_output = gr.Textbox(label='Output text', interactive=True)
with gr.Row():
with gr.Column():
gr.Examples(examples=[
["examples_v2/office.jpg", "[grounding] describe this image in detail"],
["examples_v2/sofa.jpg", "[detection] sofas"],
["examples_v2/2000x1372_wmkn_0012149409555.jpg", "[refer] the world cup"],
["examples_v2/KFC-20-for-20-Nuggets.jpg", "[identify] what is this {<4><50><30><65>}"],
], inputs=[image_input, text_input], fn=chat,
outputs=[image_output, text_output])
with gr.Column():
gr.Examples(examples=[
["examples_v2/glip_test.jpg", "[vqa] where should I hide in this room when playing hide and seek"],
["examples_v2/float.png", "Please write a poem about the image"],
["examples_v2/thief.png", "Is the weapon fateful"],
["examples_v2/cockdial.png", "What might happen in this image in the next second"],
], inputs=[image_input, text_input], fn=chat,
outputs=[image_output, text_output])
dataset.click(
gradio_taskselect,
inputs=[dataset],
outputs=[text_input, task_inst],
show_progress="hidden",
postprocess=False,
queue=False,
)
text_input.submit(
chat,
inputs=[image_input, text_input],
outputs=[image_output, text_output],
)
submit_button.click(
chat,
inputs=[image_input, text_input],
outputs=[image_output, text_output],
)
demo.launch()