Bharatmali999's picture
Update app.py
859552c verified
# Import necessary libraries
import PyPDF2
from transformers import pipeline
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Step 1: Load the pre-trained model for Named Entity Recognition (NER)
model = pipeline('ner', model='dbmdz/bert-large-cased-finetuned-conll03-english')
# Step 2: Extract text from uploaded PDF resume
def extract_text_from_pdf(pdf_file):
with open(pdf_file, "rb") as file:
reader = PyPDF2.PdfFileReader(file)
text = ""
for page in range(reader.numPages):
text += reader.getPage(page).extract_text()
return text
# Step 3: Extract relevant information (skills, job titles, etc.) from text using the model
def analyze_resume(resume_text):
model_output = model(resume_text)
# Extract skills and job titles
skills = [item['word'] for item in model_output if item['entity'] == 'SKILL']
job_title = [item['word'] for item in model_output if item['entity'] == 'JOB_TITLE']
# Returning extracted data
return {'skills': skills, 'job_title': job_title}
# Step 4: Calculate similarity between the job description and the resume
def calculate_similarity(job_desc, resume_text):
# Create a Tfidf Vectorizer to convert text into vectors
vectorizer = TfidfVectorizer(stop_words='english')
# Combine the job description and resume text into a single list
documents = [job_desc, resume_text]
# Convert the texts into TF-IDF vectors
tfidf_matrix = vectorizer.fit_transform(documents)
# Compute cosine similarity between job description and resume
similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
return similarity[0][0]
# Step 5: Check if the resume matches the job description and return "Good Fit" or "Not Fit"
def match_job_description(job_desc, resume_file):
# Extract text from resume
resume_text = extract_text_from_pdf(resume_file.name)
# Calculate the similarity between job description and resume
similarity_score = calculate_similarity(job_desc, resume_text)
# If the similarity score is greater than a threshold (e.g., 0.7), it's a good fit
if similarity_score >= 0.7:
return "Good Fit"
else:
return "Not Fit"
# Step 6: Gradio Interface function
def process_resume(job_desc, resume_file):
# Match the job description with the uploaded resume
result = match_job_description(job_desc, resume_file)
return result
# Step 7: Create Gradio Interface
interface = gr.Interface(
fn=process_resume,
inputs=["text", "file"], # Job Description (text) and Resume (file) as inputs
outputs="text", # Output will be a simple text response (Good Fit / Not Fit)
live=True
)
# Step 8: Launch the app
interface.launch()