goblingeorge commited on
Commit
234d0a0
·
verified ·
1 Parent(s): 09ea084

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -55
app.py CHANGED
@@ -1,70 +1,100 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
 
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
22
 
23
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- response = ""
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
  import gradio as gr
2
+ import os
3
+ import litellm
4
 
5
+ DESCRIPTION = '''
6
+ <div>
7
+ <h1 style="text-align: center;">TAIDE/Gemma-3-TAIDE-12b-Chat</h1>
8
+ <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/taide/Gemma-3-TAIDE-12b-Chat"><b>Gemma-3-TAIDE-12b-Chat</b></a>. Gemma-3-TAIDE-12b-Chat is the new open LLM and comes in one sizes: 8b. Feel free to play with it, or duplicate to run privately!</p>
9
+ </div>
10
+ '''
11
 
12
+ LICENSE = """
13
+ <p/>
14
+ ---
15
+ Built with Gemma-3-TAIDE-12b-Chat
16
+ """
 
 
 
 
 
 
 
 
17
 
18
+ css = """
19
+ h1 {
20
+ text-align: center;
21
+ display: block;
22
+ }
23
+ #duplicate-button {
24
+ margin: auto;
25
+ color: white;
26
+ background: #1565c0;
27
+ border-radius: 100vh;
28
+ }
29
+ """
30
 
 
31
 
32
+ def chat(message: str,
33
+ history: list,
34
+ temperature: float,
35
+ max_new_tokens: int
36
+ ) -> str:
37
+ """
38
+ Generate a streaming response using the llama3-8b model.
39
+ """
40
+ try:
41
+ messages = []
42
+ for user, assistant in history:
43
+ messages.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
44
+ messages.append({"role": "user", "content": message})
45
 
46
+ response = litellm.completion(
47
+ model="openai/Gemma-3-TAIDE-12b-Chat", # tells litellm to call the model via the Responses API
48
+ messages=messages,
49
+ max_completion_tokens=max_new_tokens,
50
+ temperature=temperature,
51
+ stream=True,
52
 
53
+ )
54
+ output = []
55
+ for part in response:
56
+ content = part.choices[0].delta.content or ""
57
+ output.append(content)
58
+ yield "".join(output)
59
+ except Exception as e:
60
+ yield f"生成過程中發生錯誤: {str(e)}"
 
 
 
61
 
 
 
62
 
63
+ # Gradio block
64
+ chatbot = gr.Chatbot(height=450, label='Gradio ChatInterface')
65
 
66
+ with gr.Blocks(fill_height=True, css=css) as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ gr.Markdown(DESCRIPTION)
69
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
70
+ gr.ChatInterface(
71
+ fn=chat,
72
+ chatbot=chatbot,
73
+ fill_height=True,
74
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
75
+ additional_inputs=[
76
+ gr.Slider(minimum=0,
77
+ maximum=1,
78
+ step=0.1,
79
+ value=0.95,
80
+ label="Temperature",
81
+ render=False),
82
+ gr.Slider(minimum=128,
83
+ maximum=131584,
84
+ step=1,
85
+ value=512,
86
+ label="Max new tokens",
87
+ render=False),
88
+ ],
89
+ examples=[
90
+ ['請以以下內容為基礎,寫一篇文章:撰寫一篇作文,題目為《一張舊照片》,內容要求為:選擇一張令你印象深刻的照片,說明令你印象深刻的原因,並描述照片中的影像及背後的故事。記錄成長的過程、與他人的情景、環境變遷和美麗的景色。'],
91
+ ['請以品牌經理的身份,給廣告公司的創意總監寫一封信,提出對於新產品廣告宣傳活動的創意建議。'],
92
+ ['以下提供英文內容,請幫我翻譯成中文。Dongshan coffee is famous for its unique position, and the constant refinement of production methods. The flavor is admired by many caffeine afficionados.'],
93
+ ],
94
+ cache_examples=False,
95
+ )
96
 
97
+ gr.Markdown(LICENSE)
98
 
99
  if __name__ == "__main__":
100
+ demo.launch(server_name='0.0.0.0')