Spaces:
Runtime error
Runtime error
Prgckwb
commited on
Commit
·
0a485e6
1
Parent(s):
d9d3f4b
:tada: init
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
from diffusers import DiffusionPipeline
|
| 4 |
from transformers import AutoTokenizer, CLIPTokenizerFast, T5TokenizerFast
|
| 5 |
-
|
| 6 |
|
| 7 |
def load_tokenizers(model_id: str) -> list[CLIPTokenizerFast | T5TokenizerFast | None]:
|
| 8 |
config = DiffusionPipeline.load_config(model_id)
|
|
@@ -25,19 +25,20 @@ def load_tokenizers(model_id: str) -> list[CLIPTokenizerFast | T5TokenizerFast |
|
|
| 25 |
|
| 26 |
|
| 27 |
@torch.no_grad()
|
| 28 |
-
def inference(model_id: str,
|
| 29 |
tokenizers = load_tokenizers(model_id)
|
| 30 |
|
| 31 |
text_pairs_components = []
|
| 32 |
special_tokens_components = []
|
|
|
|
| 33 |
for i, tokenizer in enumerate(tokenizers):
|
| 34 |
if tokenizer:
|
| 35 |
label_text = f"Tokenizer {i + 1}: {tokenizer.__class__.__name__}"
|
| 36 |
|
| 37 |
# テキストとトークンIDのペアを作成
|
| 38 |
input_ids = tokenizer(
|
| 39 |
-
text=
|
| 40 |
-
truncation=
|
| 41 |
return_length=False,
|
| 42 |
return_overflowing_tokens=False,
|
| 43 |
).input_ids
|
|
@@ -49,7 +50,6 @@ def inference(model_id: str, input_text: str):
|
|
| 49 |
label=label_text,
|
| 50 |
value=token_pairs,
|
| 51 |
visible=True,
|
| 52 |
-
show_legend=True,
|
| 53 |
)
|
| 54 |
|
| 55 |
# スペシャルトークンを追加
|
|
@@ -63,16 +63,32 @@ def inference(model_id: str, input_text: str):
|
|
| 63 |
label=label_text,
|
| 64 |
value=special_tokens,
|
| 65 |
visible=True,
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
)
|
| 68 |
else:
|
| 69 |
output_text_pair_component = gr.HighlightedText(visible=False)
|
| 70 |
output_special_tokens_component = gr.HighlightedText(visible=False)
|
|
|
|
| 71 |
|
| 72 |
text_pairs_components.append(output_text_pair_component)
|
| 73 |
special_tokens_components.append(output_special_tokens_component)
|
|
|
|
| 74 |
|
| 75 |
-
return
|
| 76 |
|
| 77 |
|
| 78 |
if __name__ == "__main__":
|
|
@@ -110,6 +126,11 @@ if __name__ == "__main__":
|
|
| 110 |
output_special_tokens_1 = gr.HighlightedText()
|
| 111 |
output_special_tokens_2 = gr.HighlightedText()
|
| 112 |
output_special_tokens_3 = gr.HighlightedText()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
with gr.Row():
|
| 115 |
clear_button = gr.ClearButton(components=[input_text])
|
|
@@ -123,6 +144,9 @@ if __name__ == "__main__":
|
|
| 123 |
output_special_tokens_1,
|
| 124 |
output_special_tokens_2,
|
| 125 |
output_special_tokens_3,
|
|
|
|
|
|
|
|
|
|
| 126 |
]
|
| 127 |
submit_button.click(fn=inference, inputs=all_inputs, outputs=all_output)
|
| 128 |
|
|
@@ -141,4 +165,4 @@ if __name__ == "__main__":
|
|
| 141 |
cache_examples=True,
|
| 142 |
)
|
| 143 |
|
| 144 |
-
demo.queue().launch()
|
|
|
|
| 2 |
import torch
|
| 3 |
from diffusers import DiffusionPipeline
|
| 4 |
from transformers import AutoTokenizer, CLIPTokenizerFast, T5TokenizerFast
|
| 5 |
+
import pandas as pd
|
| 6 |
|
| 7 |
def load_tokenizers(model_id: str) -> list[CLIPTokenizerFast | T5TokenizerFast | None]:
|
| 8 |
config = DiffusionPipeline.load_config(model_id)
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
@torch.no_grad()
|
| 28 |
+
def inference(model_id: str, text: str):
|
| 29 |
tokenizers = load_tokenizers(model_id)
|
| 30 |
|
| 31 |
text_pairs_components = []
|
| 32 |
special_tokens_components = []
|
| 33 |
+
tokenizer_details_components = []
|
| 34 |
for i, tokenizer in enumerate(tokenizers):
|
| 35 |
if tokenizer:
|
| 36 |
label_text = f"Tokenizer {i + 1}: {tokenizer.__class__.__name__}"
|
| 37 |
|
| 38 |
# テキストとトークンIDのペアを作成
|
| 39 |
input_ids = tokenizer(
|
| 40 |
+
text=text,
|
| 41 |
+
truncation=False,
|
| 42 |
return_length=False,
|
| 43 |
return_overflowing_tokens=False,
|
| 44 |
).input_ids
|
|
|
|
| 50 |
label=label_text,
|
| 51 |
value=token_pairs,
|
| 52 |
visible=True,
|
|
|
|
| 53 |
)
|
| 54 |
|
| 55 |
# スペシャルトークンを追加
|
|
|
|
| 63 |
label=label_text,
|
| 64 |
value=special_tokens,
|
| 65 |
visible=True,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# トークナイザーの詳細情報を追加
|
| 69 |
+
tokenizer_details = pd.DataFrame([
|
| 70 |
+
("Type", tokenizer.__class__.__name__),
|
| 71 |
+
("Vocab Size", tokenizer.vocab_size),
|
| 72 |
+
("Model Max Length", tokenizer.model_max_length),
|
| 73 |
+
("Padding Side", tokenizer.padding_side),
|
| 74 |
+
("Truncation Side", tokenizer.truncation_side),
|
| 75 |
+
], columns=["Attribute", "Value"])
|
| 76 |
+
output_tokenizer_details = gr.Dataframe(
|
| 77 |
+
headers=["Attribute", "Value"],
|
| 78 |
+
value=tokenizer_details,
|
| 79 |
+
label=label_text,
|
| 80 |
+
visible=True,
|
| 81 |
)
|
| 82 |
else:
|
| 83 |
output_text_pair_component = gr.HighlightedText(visible=False)
|
| 84 |
output_special_tokens_component = gr.HighlightedText(visible=False)
|
| 85 |
+
output_tokenizer_details = gr.Dataframe(visible=False)
|
| 86 |
|
| 87 |
text_pairs_components.append(output_text_pair_component)
|
| 88 |
special_tokens_components.append(output_special_tokens_component)
|
| 89 |
+
tokenizer_details_components.append(output_tokenizer_details)
|
| 90 |
|
| 91 |
+
return text_pairs_components + special_tokens_components + tokenizer_details_components
|
| 92 |
|
| 93 |
|
| 94 |
if __name__ == "__main__":
|
|
|
|
| 126 |
output_special_tokens_1 = gr.HighlightedText()
|
| 127 |
output_special_tokens_2 = gr.HighlightedText()
|
| 128 |
output_special_tokens_3 = gr.HighlightedText()
|
| 129 |
+
with gr.Tab(label="Tokenizer Details"):
|
| 130 |
+
with gr.Column():
|
| 131 |
+
output_tokenizer_details_1 = gr.Dataframe(headers=["Attribute", "Value"])
|
| 132 |
+
output_tokenizer_details_2 = gr.Dataframe(headers=["Attribute", "Value"])
|
| 133 |
+
output_tokenizer_details_3 = gr.Dataframe(headers=["Attribute", "Value"])
|
| 134 |
|
| 135 |
with gr.Row():
|
| 136 |
clear_button = gr.ClearButton(components=[input_text])
|
|
|
|
| 144 |
output_special_tokens_1,
|
| 145 |
output_special_tokens_2,
|
| 146 |
output_special_tokens_3,
|
| 147 |
+
output_tokenizer_details_1,
|
| 148 |
+
output_tokenizer_details_2,
|
| 149 |
+
output_tokenizer_details_3,
|
| 150 |
]
|
| 151 |
submit_button.click(fn=inference, inputs=all_inputs, outputs=all_output)
|
| 152 |
|
|
|
|
| 165 |
cache_examples=True,
|
| 166 |
)
|
| 167 |
|
| 168 |
+
demo.queue().launch()
|