|
|
|
|
|
import PyPDF2 |
|
|
from transformers import pipeline |
|
|
import gradio as gr |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
|
|
|
model = pipeline('ner', model='dbmdz/bert-large-cased-finetuned-conll03-english') |
|
|
|
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
|
with open(pdf_file, "rb") as file: |
|
|
reader = PyPDF2.PdfFileReader(file) |
|
|
text = "" |
|
|
for page in range(reader.numPages): |
|
|
text += reader.getPage(page).extract_text() |
|
|
return text |
|
|
|
|
|
|
|
|
def analyze_resume(resume_text): |
|
|
model_output = model(resume_text) |
|
|
|
|
|
|
|
|
skills = [item['word'] for item in model_output if item['entity'] == 'SKILL'] |
|
|
job_title = [item['word'] for item in model_output if item['entity'] == 'JOB_TITLE'] |
|
|
|
|
|
|
|
|
return {'skills': skills, 'job_title': job_title} |
|
|
|
|
|
|
|
|
def calculate_similarity(job_desc, resume_text): |
|
|
|
|
|
vectorizer = TfidfVectorizer(stop_words='english') |
|
|
|
|
|
|
|
|
documents = [job_desc, resume_text] |
|
|
|
|
|
|
|
|
tfidf_matrix = vectorizer.fit_transform(documents) |
|
|
|
|
|
|
|
|
similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) |
|
|
return similarity[0][0] |
|
|
|
|
|
|
|
|
def match_job_description(job_desc, resume_file): |
|
|
|
|
|
resume_text = extract_text_from_pdf(resume_file.name) |
|
|
|
|
|
|
|
|
similarity_score = calculate_similarity(job_desc, resume_text) |
|
|
|
|
|
|
|
|
if similarity_score >= 0.7: |
|
|
return "Good Fit" |
|
|
else: |
|
|
return "Not Fit" |
|
|
|
|
|
|
|
|
def process_resume(job_desc, resume_file): |
|
|
|
|
|
result = match_job_description(job_desc, resume_file) |
|
|
return result |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=process_resume, |
|
|
inputs=["text", "file"], |
|
|
outputs="text", |
|
|
live=True |
|
|
) |
|
|
|
|
|
|
|
|
interface.launch() |
|
|
|