Spaces:

ganireddikumar
/

Resume_Parser_NLP_Pytorch

Sleeping

App Files Files Community

ganireddikumar commited on Feb 7, 2025

Commit

b8067f4

verified ·

1 Parent(s): 64711ba

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -31

app.py CHANGED Viewed

@@ -1,24 +1,12 @@
-import re
-import spacy
-import torch
-import nltk
 import fitz  # PyMuPDF for PDF extraction
 import gradio as gr
-import subprocess
-from nltk.tokenize import word_tokenize, sent_tokenize
-from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline, Trainer, TrainingArguments
-from sentence_transformers import SentenceTransformer, util
-import json
 from transformers import T5ForConditionalGeneration, T5Tokenizer
-# -------------------------------
-# ✅ Load Fine-Tuned Model & Tokenizer from "model/" Directory
-# -------------------------------
-model_path = "model"
-model = T5ForConditionalGeneration.from_pretrained(model_path)  # ⬅️ Loads the fine-tuned model
-tokenizer = T5Tokenizer.from_pretrained(model_path)  # ⬅️ Loads the fine-tuned tokenizer
-# ✅ Move model to GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
@@ -31,26 +19,26 @@ def extract_text_from_pdf(pdf_path):
     return text
 def parse_resume(pdf_file):
-    """Processes the PDF file, extracts text, and runs model inference."""
     resume_text = extract_text_from_pdf(pdf_file.name)
-    # ✅ Create a prompt for T5 inference
-    prompt = f"Extract information from the resume: {resume_text}"
-    # ✅ Tokenize input and move to device
-    input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device)
-    # ✅ Generate structured output using fine-tuned model
-    outputs = model.generate(input_ids, max_length=128, num_beams=4, early_stopping=True)
-    # ✅ Decode model output to structured JSON format
     result = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return result
-# -------------------------------
-# ✅ Deploy as Hugging Face Gradio App
-# -------------------------------
 iface = gr.Interface(
     fn=parse_resume,
     inputs=gr.File(type="filepath"),
@@ -61,4 +49,3 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 import fitz  # PyMuPDF for PDF extraction
 import gradio as gr
+import torch
 from transformers import T5ForConditionalGeneration, T5Tokenizer
+# Load fine-tuned model
+model = T5ForConditionalGeneration.from_pretrained("model")
+tokenizer = T5Tokenizer.from_pretrained("model")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
     return text
 def parse_resume(pdf_file):
+    """Extract structured JSON information from a resume PDF."""
     resume_text = extract_text_from_pdf(pdf_file.name)
+    # Improve prompt formatting
+    prompt = (
+        f"Extract structured information from the following resume and return it in JSON format:\n\n"
+        f"{resume_text}\n\n"
+        f"Output format:\n"
+        f'{{"Name": "John Doe", "Email": "johndoe@email.com", "Phone": "123-456-7890", '
+        f'"Education": "B.Sc. in Computer Science", "Experience": "5 years", "Skills": "Python, ML, TensorFlow"}}'
+    )
+    # Tokenize and generate structured JSON output
+    input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
+    outputs = model.generate(input_ids, max_length=256, num_beams=4, early_stopping=True)
     result = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return result
+# Deploy Gradio interface
 iface = gr.Interface(
     fn=parse_resume,
     inputs=gr.File(type="filepath"),
 if __name__ == "__main__":
     iface.launch()