ganireddikumar commited on
Commit
b8067f4
·
verified ·
1 Parent(s): 64711ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -31
app.py CHANGED
@@ -1,24 +1,12 @@
1
- import re
2
- import spacy
3
- import torch
4
- import nltk
5
  import fitz # PyMuPDF for PDF extraction
6
  import gradio as gr
7
- import subprocess
8
- from nltk.tokenize import word_tokenize, sent_tokenize
9
- from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline, Trainer, TrainingArguments
10
- from sentence_transformers import SentenceTransformer, util
11
- import json
12
  from transformers import T5ForConditionalGeneration, T5Tokenizer
13
 
14
- # -------------------------------
15
- # Load Fine-Tuned Model & Tokenizer from "model/" Directory
16
- # -------------------------------
17
- model_path = "model"
18
- model = T5ForConditionalGeneration.from_pretrained(model_path) # ⬅️ Loads the fine-tuned model
19
- tokenizer = T5Tokenizer.from_pretrained(model_path) # ⬅️ Loads the fine-tuned tokenizer
20
 
21
- # ✅ Move model to GPU if available
22
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
  model.to(device)
24
 
@@ -31,26 +19,26 @@ def extract_text_from_pdf(pdf_path):
31
  return text
32
 
33
  def parse_resume(pdf_file):
34
- """Processes the PDF file, extracts text, and runs model inference."""
35
  resume_text = extract_text_from_pdf(pdf_file.name)
36
 
37
- # Create a prompt for T5 inference
38
- prompt = f"Extract information from the resume: {resume_text}"
39
-
40
- # ✅ Tokenize input and move to device
41
- input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=256, truncation=True).to(device)
42
-
43
- # Generate structured output using fine-tuned model
44
- outputs = model.generate(input_ids, max_length=128, num_beams=4, early_stopping=True)
45
-
46
- # Decode model output to structured JSON format
 
 
47
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
 
49
  return result
50
 
51
- # -------------------------------
52
- # ✅ Deploy as Hugging Face Gradio App
53
- # -------------------------------
54
  iface = gr.Interface(
55
  fn=parse_resume,
56
  inputs=gr.File(type="filepath"),
@@ -61,4 +49,3 @@ iface = gr.Interface(
61
 
62
  if __name__ == "__main__":
63
  iface.launch()
64
-
 
 
 
 
 
1
  import fitz # PyMuPDF for PDF extraction
2
  import gradio as gr
3
+ import torch
 
 
 
 
4
  from transformers import T5ForConditionalGeneration, T5Tokenizer
5
 
6
+ # Load fine-tuned model
7
+ model = T5ForConditionalGeneration.from_pretrained("model")
8
+ tokenizer = T5Tokenizer.from_pretrained("model")
 
 
 
9
 
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  model.to(device)
12
 
 
19
  return text
20
 
21
  def parse_resume(pdf_file):
22
+ """Extract structured JSON information from a resume PDF."""
23
  resume_text = extract_text_from_pdf(pdf_file.name)
24
 
25
+ # Improve prompt formatting
26
+ prompt = (
27
+ f"Extract structured information from the following resume and return it in JSON format:\n\n"
28
+ f"{resume_text}\n\n"
29
+ f"Output format:\n"
30
+ f'{{"Name": "John Doe", "Email": "johndoe@email.com", "Phone": "123-456-7890", '
31
+ f'"Education": "B.Sc. in Computer Science", "Experience": "5 years", "Skills": "Python, ML, TensorFlow"}}'
32
+ )
33
+
34
+ # Tokenize and generate structured JSON output
35
+ input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
36
+ outputs = model.generate(input_ids, max_length=256, num_beams=4, early_stopping=True)
37
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
38
 
39
  return result
40
 
41
+ # Deploy Gradio interface
 
 
42
  iface = gr.Interface(
43
  fn=parse_resume,
44
  inputs=gr.File(type="filepath"),
 
49
 
50
  if __name__ == "__main__":
51
  iface.launch()