sharadsnaik commited on
Commit
1504024
·
verified ·
1 Parent(s): 4f07730

code snippet in README

Browse files
Files changed (1) hide show
  1. README.md +73 -1
README.md CHANGED
@@ -16,9 +16,81 @@ language:
16
  - en
17
  pipeline_tag: image-text-to-text
18
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  from huggingface_hub import hf_hub_download
20
  from llama_cpp import Llama
21
- p = hf_hub_download("USERNAME/medgemma-4b-it-medical-gguf",
22
  "medgemma-4b-it-finnetunned-merged_new_for_cpu_q5_k_m.gguf")
23
  llm = Llama(model_path=p, n_ctx=4096, n_threads=8, chat_format="gemma")
24
  print(llm.create_chat_completion(messages=[{"role":"user","content":"Hello"}]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  - en
17
  pipeline_tag: image-text-to-text
18
  ---
19
+
20
+
21
+ # medgemma-4b-it — medical fine-tune (5-bit GGUF)
22
+
23
+
24
+ ## Model Details
25
+
26
+ ## Files
27
+ - `medgemma-4b-it-finnetunned-merged_new_for_cpu_q5_k_m.gguf` (~2.83 GB)
28
+
29
+ ## How to run (llama.cpp)
30
+ ```bash
31
+ # Requires llama.cpp. You can run directly from the Hub path:
32
+ llama-cli -m hf://sharadsnaik/medgemma-4b-it-medical-gguf/medgemma-4b-it-finnetunned-merged_new_for_cpu_q5_k_m.gguf -p "Hello"
33
+
34
+ ```
35
+
36
+ ## How to Get Started with the Model
37
+ ```
38
  from huggingface_hub import hf_hub_download
39
  from llama_cpp import Llama
40
+ p = hf_hub_download("sharadsnaik/medgemma-4b-it-medical-gguf",
41
  "medgemma-4b-it-finnetunned-merged_new_for_cpu_q5_k_m.gguf")
42
  llm = Llama(model_path=p, n_ctx=4096, n_threads=8, chat_format="gemma")
43
  print(llm.create_chat_completion(messages=[{"role":"user","content":"Hello"}]))
44
+ ```
45
+ [More Information Needed]
46
+
47
+ ## Training Details
48
+
49
+ ### Training Data
50
+ ```
51
+ ruslanmv/ai-medical-chatbot
52
+ ```
53
+
54
+ ## Sample Code Usage:
55
+
56
+
57
+ #### `app.py`
58
+ ```python
59
+ import os, gradio as gr
60
+ from huggingface_hub import hf_hub_download
61
+ from llama_cpp import Llama
62
+
63
+ # Your model repo + filename
64
+ REPO_ID = "sharadsnaik/medgemma-4b-it-medical-gguf"
65
+ FILENAME = "medgemma-4b-it-finnetunned-merged_new_for_cpu_q5_k_m.gguf"
66
+
67
+ # Download from Hub to local cache
68
+ MODEL_PATH = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="model")
69
+
70
+ # Create the llama.cpp model
71
+ # Use all available CPU threads; chat_format="gemma" matches Gemma-style prompts
72
+ llm = Llama(
73
+ model_path=MODEL_PATH,
74
+ n_ctx=4096,
75
+ n_threads=os.cpu_count(),
76
+ chat_format="gemma" # important for Gemma/Med-Gemma instruction formatting
77
+ )
78
+
79
+ def chat_fn(message, history):
80
+ # Convert Gradio history -> OpenAI-style messages
81
+ messages = []
82
+ for user_msg, bot_msg in history:
83
+ messages.append({"role":"user","content":user_msg})
84
+ if bot_msg:
85
+ messages.append({"role":"assistant","content":bot_msg})
86
+ messages.append({"role":"user","content":message})
87
+
88
+ out = llm.create_chat_completion(messages=messages, temperature=0.6, top_p=0.95)
89
+ reply = out["choices"][0]["message"]["content"]
90
+ return reply
91
+
92
+ demo = gr.ChatInterface(fn=chat_fn, title="MedGemma 4B (Q5_K_M) — CPU Space")
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch()
96
+ ```