|
|
|
|
|
import re |
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
from functools import lru_cache |
|
|
import spacy |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
|
|
|
|
|
@lru_cache(maxsize=3) |
|
|
def load_spacy_model(name): |
|
|
return spacy.load(name) |
|
|
|
|
|
@lru_cache(maxsize=3) |
|
|
def load_nli_pipeline(): |
|
|
return pipeline( |
|
|
"zero-shot-classification", |
|
|
model="valhalla/distilbart-mnli-12-1", |
|
|
device=0 |
|
|
) |
|
|
|
|
|
@lru_cache(maxsize=3) |
|
|
def load_summarizer(): |
|
|
|
|
|
model_name = "t5-small" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to( |
|
|
"cuda" if torch.cuda.is_available() else "cpu" |
|
|
) |
|
|
return tokenizer, model |
|
|
|
|
|
|
|
|
SYMPTOM_DICT = { |
|
|
"headache", "migraine", "throbbing head pain", "sharp pain", "dull ache", "burning sensation", "stabbing pain", |
|
|
"nausea", "vomiting", "queasiness", "upset stomach", |
|
|
"dizziness", "lightheadedness", "vertigo", "spinning sensation", |
|
|
"chest pain", "pressure in chest", "tightness", "heartburn", |
|
|
"shortness of breath", "dyspnea", "breathlessness", "wheezing", "chest tightness", "difficulty breathing", |
|
|
"fatigue", "tiredness", "lack of energy", "exhaustion", |
|
|
"fever", "high temperature", "chills", "night sweats", "hot flashes", "cold sweats", |
|
|
"cough", "persistent cough", "dry cough", "productive cough", "hacking cough", |
|
|
"sore throat", "throat pain", "scratchy throat", "hoarseness", |
|
|
"runny nose", "nasal discharge", "stuffy nose", "congestion", |
|
|
"back pain", "lumbar pain", "lower back ache", "upper back pain", |
|
|
"neck pain", "cervical pain", "stiff neck", |
|
|
"abdominal pain", "stomach ache", "tummy ache", "cramping", "bloating", "gas", |
|
|
"muscle ache", "myalgia", "joint pain", "arthralgia", |
|
|
"rash", "skin eruption", "hives", "redness", "itching", "pruritus", |
|
|
"swelling", "edema", "inflammation", "puffiness", |
|
|
"tingling", "paresthesia", "pins and needles", "numbness", |
|
|
"bleeding", "hemorrhage", "nosebleed", "bruising", |
|
|
"insomnia", "sleep disturbance", "difficulty sleeping", "restlessness", |
|
|
"palpitations", "racing heart", "irregular heartbeat", |
|
|
"diarrhea", "loose stools", "constipation", "hard stools", |
|
|
"blurred vision", "double vision", "eye pain", "visual disturbance", |
|
|
"earache", "ringing in ears", "tinnitus", "hearing loss", |
|
|
"urinary frequency", "painful urination", "dysuria", "blood in urine", |
|
|
"weight loss", "weight gain", "loss of appetite", "increased appetite", |
|
|
"confusion", "memory loss", "difficulty concentrating", "brain fog" |
|
|
} |
|
|
|
|
|
|
|
|
TREATMENT_DICT = { |
|
|
"rest", "bed rest", "take it easy", |
|
|
"ice pack", "cold compress", "heat pack", "warm compress", |
|
|
"physical therapy", "physiotherapy sessions", "rehab", "exercise program", |
|
|
"surgery", "operative repair", "procedure", "surgical intervention", "minimally invasive surgery", |
|
|
"antibiotics", "amoxicillin", "doxycycline", "penicillin", |
|
|
"painkillers", "analgesics", "ibuprofen", "acetaminophen", "paracetamol", "naproxen", |
|
|
"steroids", "corticosteroids", "prednisone", "dexamethasone", |
|
|
"insulin therapy", "oral hypoglycemics", "metformin", "glipizide", |
|
|
"antihypertensives", "lisinopril", "amlodipine", "losartan", |
|
|
"chemotherapy", "radiation therapy", "immunotherapy", |
|
|
"inhaler", "bronchodilator", "albuterol", "salmeterol", |
|
|
"antidepressants", "SSRIs", "sertraline", "fluoxetine", "venlafaxine", |
|
|
"anticoagulants", "warfarin", "heparin", "rivaroxaban", |
|
|
"dietary changes", "low-salt diet", "gluten-free diet", "low-carb diet", "Mediterranean diet", |
|
|
"vaccination", "immunization", "flu shot", "tetanus shot", |
|
|
"wound dressing", "bandaging", "stitches", "sutures", |
|
|
"counseling", "psychotherapy", "cognitive behavioral therapy", "CBT", |
|
|
"hydration", "fluid intake", "electrolyte replacement", "IV fluids", |
|
|
"elevating the limb", "compression stockings", "massage", |
|
|
"applying ointment", "topical creams", "steroid creams", "antibiotic ointment", |
|
|
"breathing exercises", "pulmonary rehabilitation", "oxygen therapy", |
|
|
"diet modification", "exercise regimen", "weight management", "lifestyle changes", |
|
|
"mindfulness", "relaxation techniques", "meditation", |
|
|
"laparoscopic surgery", "endoscopy", "colonoscopy" |
|
|
} |
|
|
|
|
|
|
|
|
DIAGNOSIS_DICT = { |
|
|
"hypertension", "high blood pressure", |
|
|
"diabetes mellitus", "type 2 diabetes", "type II diabetes", "sugar diabetes", |
|
|
"myocardial infarction", "heart attack", |
|
|
"stroke", "cerebrovascular accident", "brain attack", |
|
|
"whiplash injury", "cervical strain", "neck sprain", |
|
|
"concussion", "mild traumatic brain injury", "head injury", |
|
|
"pneumonia", "lung infection", "chest infection", |
|
|
"urinary tract infection", "UTI", "bladder infection", |
|
|
"appendicitis", "inflammation of the appendix", |
|
|
"fracture", "broken bone", "hairline fracture", |
|
|
"sprain", "ligament tear", "twisted ankle", |
|
|
"gastroenteritis", "stomach flu", "food poisoning", |
|
|
"anemia", "low hemoglobin", "iron deficiency", |
|
|
"migraine disorder", "recurrent headaches", |
|
|
"osteoarthritis", "degenerative joint disease", "wear and tear arthritis", |
|
|
"depression", "major depressive disorder", "clinical depression", |
|
|
"anxiety disorder", "generalized anxiety", "panic disorder", |
|
|
"asthma", "reactive airway disease", |
|
|
"chronic obstructive pulmonary disease", "COPD", "emphysema", |
|
|
"bronchitis", "chest cold", |
|
|
"sinusitis", "sinus infection", |
|
|
"allergic rhinitis", "hay fever", |
|
|
"otitis media", "middle ear infection", |
|
|
"gastroesophageal reflux disease", "GERD", "acid reflux", |
|
|
"peptic ulcer disease", "stomach ulcer", |
|
|
"irritable bowel syndrome", "IBS", |
|
|
"kidney stones", "renal calculi", |
|
|
"prostatitis", "prostate infection", |
|
|
"arthritis", "rheumatoid arthritis", "gout", |
|
|
"osteoporosis", "brittle bones", |
|
|
"thyroid disorder", "hypothyroidism", "hyperthyroidism", |
|
|
"bipolar disorder", "manic depression", |
|
|
"schizophrenia", "psychotic disorder", |
|
|
"post-traumatic stress disorder", "PTSD", |
|
|
"skin cancer", "melanoma", "basal cell carcinoma", |
|
|
"hepatitis", "liver inflammation", |
|
|
"cirrhosis", "liver scarring", |
|
|
"fatty liver disease", "hepatic steatosis", |
|
|
"multiple sclerosis", "MS", |
|
|
"Parkinson's disease", "tremor disorder", |
|
|
"Alzheimer's disease", "dementia" |
|
|
} |
|
|
|
|
|
|
|
|
PROGNOSIS_DICT = { |
|
|
"full recovery expected", "complete recovery likely", "should make a full recovery", |
|
|
"partial recovery", "some residual symptoms possible", "may not fully resolve", |
|
|
"guarded prognosis", "uncertain outcome", "wait and see", |
|
|
"good prognosis", "favorable outcome", "positive outlook", |
|
|
"poor prognosis", "unfavorable outcome", "serious condition", |
|
|
"recovery expected within six months", "reconstructive healing time", "recovery in 2-3 weeks", "healing time of 4-6 weeks", |
|
|
"likely recurrence", "risk of relapse", "may come back", |
|
|
"chronic condition", "long-term management needed", "ongoing treatment required", |
|
|
"acute condition", "short-lived course", "temporary issue", |
|
|
"stable", "condition stable", "no change expected", |
|
|
"progressive", "worsening over time", "may deteriorate", |
|
|
"in remission", "disease in remission", "currently under control", |
|
|
"monitor regularly", "follow-up recommended", "keep an eye on it", |
|
|
"palliative care", "supportive management", "focus on comfort", |
|
|
"physiotherapy", "rehabilitation program", "exercise therapy", |
|
|
"expected to improve", "likely to get better", "should see improvement", |
|
|
"may require ongoing treatment", "long-term follow-up needed", "continued care necessary", |
|
|
"risk of complications", "potential for recurrence", "may need further intervention", |
|
|
"stable condition", "no immediate concerns", "holding steady", |
|
|
"full recovery anticipated", "partial recovery possible", "residual symptoms likely", |
|
|
"prognosis is excellent", "very good outlook", "should do well", |
|
|
"recovery timeline", "healing process", "expected duration", |
|
|
"short-term issue", "long-term condition", "permanent damage", |
|
|
"no long-term impact", "should not affect daily life", "minimal lasting effects", |
|
|
"10 weeks", "weeks", "months", "days", "years" |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SENTIMENT_LABELS = [ |
|
|
"The speaker shows signs of anxiety or worry", |
|
|
"The speaker shows a neutral or calm demeanor", |
|
|
"The speaker shows signs of reassurance, relief, or comfort" |
|
|
] |
|
|
INTENT_LABELS = [ |
|
|
"The speaker is asking for reassurance or comfort", |
|
|
"The speaker is describing their symptoms or condition", |
|
|
"The speaker is voicing worry about a situation" |
|
|
] |
|
|
SENTIMENT_MAP = { |
|
|
SENTIMENT_LABELS[0]: "Anxious", |
|
|
SENTIMENT_LABELS[1]: "Neutral", |
|
|
SENTIMENT_LABELS[2]: "Reassured" |
|
|
} |
|
|
INTENT_MAP = { |
|
|
INTENT_LABELS[0]: "Seeking reassurance", |
|
|
INTENT_LABELS[1]: "Reporting symptoms", |
|
|
INTENT_LABELS[2]: "Expressing concern" |
|
|
} |
|
|
|
|
|
|
|
|
def extract_patient_name(text: str) -> str: |
|
|
nlp = load_spacy_model("en_core_web_sm") |
|
|
for line in text.splitlines()[:6]: |
|
|
for ent in nlp(line).ents: |
|
|
if ent.label_ == "PERSON": |
|
|
return ent.text |
|
|
return "Not Specified" |
|
|
|
|
|
def clean_transcript(text: str) -> str: |
|
|
return re.sub(r"(?:Physician|Doctor|Patient):", "", text).strip() |
|
|
|
|
|
def extract_medical_spans(text: str): |
|
|
nlp1 = load_spacy_model("en_core_sci_lg") |
|
|
nlp2 = load_spacy_model("en_ner_bc5cdr_md") |
|
|
spans = {ent.text for ent in nlp1(text).ents} | {ent.text for ent in nlp2(text).ents} |
|
|
return list(spans) |
|
|
|
|
|
def map_to_bucket(span: str): |
|
|
from rapidfuzz import process, fuzz |
|
|
buckets = { |
|
|
"Symptoms": SYMPTOM_DICT, |
|
|
"Treatment": TREATMENT_DICT, |
|
|
"Diagnosis": DIAGNOSIS_DICT, |
|
|
"Prognosis": PROGNOSIS_DICT, |
|
|
} |
|
|
best, best_score, best_match = None, 0, None |
|
|
for name, terms in buckets.items(): |
|
|
match, score, _ = process.extractOne(span, terms, scorer=fuzz.token_sort_ratio) |
|
|
if score > best_score: |
|
|
best, best_score, best_match = name, score, match |
|
|
if best_score >= 70: |
|
|
return best, best_match |
|
|
return None, None |
|
|
|
|
|
def ner_pipeline(text: str): |
|
|
cleaned = clean_transcript(text) |
|
|
name = extract_patient_name(text) |
|
|
spans = extract_medical_spans(cleaned) |
|
|
results = { |
|
|
"Patient_Name": name, |
|
|
"Symptoms": set(), |
|
|
"Treatment": set(), |
|
|
"Diagnosis": set(), |
|
|
"Prognosis": set() |
|
|
} |
|
|
for span in spans: |
|
|
bucket, match = map_to_bucket(span) |
|
|
if bucket: |
|
|
results[bucket].add(match) |
|
|
|
|
|
for k in ["Symptoms","Treatment","Diagnosis","Prognosis"]: |
|
|
results[k] = list(results[k]) or ["Not Specified"] |
|
|
return results |
|
|
|
|
|
|
|
|
def sentiment_intent_pipeline(text: str): |
|
|
nli = load_nli_pipeline() |
|
|
s = nli(text, candidate_labels=SENTIMENT_LABELS) |
|
|
i = nli(text, candidate_labels=INTENT_LABELS) |
|
|
return { |
|
|
"Sentiment": SENTIMENT_MAP[s["labels"][0]], |
|
|
"Sentiment_Confidence": round(s["scores"][0], 2), |
|
|
"Intent": INTENT_MAP[i["labels"][0]], |
|
|
"Intent_Confidence": round(i["scores"][0], 2) |
|
|
} |
|
|
|
|
|
|
|
|
def summarize_text(text: str, min_length=15, max_length=100): |
|
|
if not text.strip(): |
|
|
return "No information provided." |
|
|
tok, mdl = load_summarizer() |
|
|
inputs = tok([text], truncation=True, padding="longest", return_tensors="pt").to(mdl.device) |
|
|
out = mdl.generate(**inputs, min_length=min_length, max_length=max_length, num_beams=4) |
|
|
return tok.decode(out[0], skip_special_tokens=True) |
|
|
|
|
|
def classify_utterance_by_rules(utt: str) -> str: |
|
|
loi = utt.lower() |
|
|
|
|
|
subj_kws = ["i feel","pain","discomfort","ache","nausea","dizzy","worried"] |
|
|
if any(kw in loi for kw in subj_kws) and loi.startswith("patient"): |
|
|
return "Subjective" |
|
|
|
|
|
obj_kws = ["exam","observed","range of motion","vitals","lab","imaging"] |
|
|
if any(kw in loi for kw in obj_kws) and loi.startswith("physician"): |
|
|
return "Objective" |
|
|
|
|
|
plan_kws = ["recommend","continue","follow up","prescribe","return"] |
|
|
if any(kw in loi for kw in plan_kws) and loi.startswith("physician"): |
|
|
return "Plan" |
|
|
return "Other" |
|
|
|
|
|
def soap_pipeline(text: str): |
|
|
|
|
|
classified = {"Subjective": [], "Objective": [], "Plan": []} |
|
|
for line in text.splitlines(): |
|
|
cat = classify_utterance_by_rules(line) |
|
|
if cat in classified: |
|
|
classified[cat].append(re.sub(r'^(Physician|Patient):\s*','', line)) |
|
|
subj = " ".join(classified["Subjective"]) |
|
|
obj = " ".join(classified["Objective"]) |
|
|
plan = " ".join(classified["Plan"]) |
|
|
return { |
|
|
"Subjective": summarize_text(subj, max_length=120), |
|
|
"Objective": summarize_text(obj, max_length= 80), |
|
|
"Plan": summarize_text(plan, max_length=100), |
|
|
"Assessment": summarize_text( |
|
|
f"Subjective: {summarize_text(subj)} " |
|
|
f"Objective: {summarize_text(obj)} " |
|
|
f"Plan: {summarize_text(plan)}", |
|
|
max_length= 60 |
|
|
) |
|
|
} |
|
|
|
|
|
|
|
|
INSTRUCTIONS = { |
|
|
"NER" : """ |
|
|
**NOTE -> First Inference Could take upto 1 minute due to loading of heavy dependencies. (Further Inferences will be quick) I have used LRU Cache** |
|
|
|
|
|
**Paste the transcript in the below format:** |
|
|
|
|
|
Physician: Good morning, Ms. Jones. How are you feeling today? |
|
|
Patient: Good morning, doctor. I’m doing better, but I still have some discomfort now and then. |
|
|
Physician: I understand you were in a car accident last September. Can you walk me through what happened? |
|
|
Patient: Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front. |
|
|
Physician: That sounds like a strong impact. Were you wearing your seatbelt? |
|
|
............. |
|
|
|
|
|
""", |
|
|
"Sentiment & Intent": """ |
|
|
**NOTE -> First Inference Could take upto 1 minute due to loading of heavy dependencies. (Further Inferences will be quick) I have used LRU Cache** |
|
|
**Paste the patient transcript in the below format:** |
|
|
|
|
|
Patient: That’s great to hear. So, I don’t need to worry about this affecting me in the future? |
|
|
""", |
|
|
|
|
|
"SOAP Generation" : """ |
|
|
**NOTE -> First Inference Could take upto 1 minute due to loading of heavy dependencies. (Further Inferences will be quick) I have used LRU Cache** |
|
|
|
|
|
|
|
|
SOAP generation is currently not fully accurate. |
|
|
I’ve found a research paper for the same : https://aclanthology.org/2021.acl-long.384.pdf :- |
|
|
Generating SOAP Notes from Doctor-Patient Conversations Using Modular Summarization Techniques |
|
|
|
|
|
""" |
|
|
|
|
|
} |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("Physician Notetaker") |
|
|
|
|
|
with gr.Row(): |
|
|
task = gr.Radio( |
|
|
choices=["NER", "Sentiment & Intent", "SOAP Generation"], |
|
|
label="Select Task", |
|
|
value="NER" |
|
|
) |
|
|
|
|
|
instructions = gr.Markdown(INSTRUCTIONS["NER"]) |
|
|
|
|
|
inp = gr.Textbox( |
|
|
lines=8, |
|
|
placeholder="Paste your transcript here...", |
|
|
label="Transcript Input" |
|
|
) |
|
|
|
|
|
btn = gr.Button("Run") |
|
|
out = gr.JSON() |
|
|
|
|
|
def router(task, text): |
|
|
if not text.strip(): |
|
|
return {"error": "Please provide input text."} |
|
|
if task == "NER": |
|
|
return ner_pipeline(text) |
|
|
if task == "Sentiment & Intent": |
|
|
return sentiment_intent_pipeline(text) |
|
|
if task == "SOAP Generation": |
|
|
return soap_pipeline(text) |
|
|
|
|
|
def update_task(selected_task): |
|
|
return gr.update(value="", visible=True), gr.update(value=INSTRUCTIONS[selected_task]) |
|
|
|
|
|
task.change(update_task, inputs=task, outputs=[inp, instructions]) |
|
|
btn.click(router, inputs=[task, inp], outputs=out) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(share=True) |
|
|
|