Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- README.md +19 -14
- app.py +535 -0
- requirements.txt +7 -0
README.md
CHANGED
|
@@ -1,14 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Resume Analyzer and Job Recommender
|
| 2 |
+
This application uses BERT to analyze resumes and recommend suitable jobs based on the content. It also allows users to match their resume against specific job descriptions.
|
| 3 |
+
|
| 4 |
+
Features
|
| 5 |
+
Resume Analysis: Extract skills and key information from PDF resumes
|
| 6 |
+
Job Recommendations: Get personalized job recommendations based on your resume
|
| 7 |
+
Job Matching: See how well your resume matches a specific job posting and get improvement suggestions
|
| 8 |
+
How to Use
|
| 9 |
+
Upload your resume (PDF format) to get an analysis and job recommendations
|
| 10 |
+
To match against a specific job, paste the job title and description in the second tab
|
| 11 |
+
Review recommendations and suggestions to improve your job applications
|
| 12 |
+
Technical Details
|
| 13 |
+
Built with Gradio for the user interface
|
| 14 |
+
Uses BERT for natural language understanding
|
| 15 |
+
Implements cosine similarity for resume-job matching
|
| 16 |
+
Provides actionable feedback for resume improvement
|
| 17 |
+
Requirements
|
| 18 |
+
See requirements.txt for a full list of dependencies.
|
| 19 |
+
|
app.py
ADDED
|
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import PyPDF2
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import BertTokenizer, BertModel
|
| 8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
import gradio as gr
|
| 10 |
+
|
| 11 |
+
# Load pre-trained BERT model
|
| 12 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 13 |
+
model = BertModel.from_pretrained('bert-base-uncased')
|
| 14 |
+
|
| 15 |
+
# Job database
|
| 16 |
+
jobs_data = [
|
| 17 |
+
# Entry-Level Positions
|
| 18 |
+
{
|
| 19 |
+
"job_id": 1,
|
| 20 |
+
"title": "Junior Software Developer",
|
| 21 |
+
"company": "Tech Solutions Inc.",
|
| 22 |
+
"description": "Join our dynamic team in developing web applications and software solutions. You'll work on real projects using modern development practices and collaborate with senior developers who will mentor you in your growth journey.",
|
| 23 |
+
"requirements": "Bachelor's degree in Computer Science or related field. 0-1 years of experience. Knowledge of Python and web technologies (HTML, CSS, JavaScript). Familiarity with Git version control and basic database concepts."
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"job_id": 2,
|
| 27 |
+
"title": "Data Analyst",
|
| 28 |
+
"company": "Data Insights Co.",
|
| 29 |
+
"description": "Help transform raw data into actionable insights. You'll create visualizations, prepare reports, and assist in building dashboards that drive business decisions. Perfect position for someone who loves finding patterns in data.",
|
| 30 |
+
"requirements": "Bachelor's degree in Statistics, Mathematics, or related field. 0-2 years of experience with data analysis. Proficiency in SQL, Excel, and basic Python. Knowledge of data visualization tools like Tableau or Power BI is a plus."
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"job_id": 3,
|
| 34 |
+
"title": "Frontend Developer",
|
| 35 |
+
"company": "WebCreate Studios",
|
| 36 |
+
"description": "Create responsive, intuitive user interfaces for web applications. You'll implement designs using modern frameworks and collaborate with designers to ensure optimal user experience and accessibility.",
|
| 37 |
+
"requirements": "Portfolio demonstrating frontend projects. Strong knowledge of HTML, CSS, JavaScript, and React. Understanding of responsive design principles. Eye for detail and ability to translate designs into functional interfaces."
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"job_id": 4,
|
| 41 |
+
"title": "Junior Machine Learning Engineer",
|
| 42 |
+
"company": "AI Innovations",
|
| 43 |
+
"description": "Implement and optimize machine learning models under the guidance of senior ML engineers. You'll work with datasets, train models, and help deploy solutions that solve real-world problems.",
|
| 44 |
+
"requirements": "Bachelor's degree in Computer Science, Mathematics, or related field. Strong understanding of Python, data structures, and algorithms. Knowledge of ML libraries like TensorFlow or PyTorch. Solid foundation in statistics and linear algebra."
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"job_id": 5,
|
| 48 |
+
"title": "DevOps Engineer (Junior)",
|
| 49 |
+
"company": "CloudTech Solutions",
|
| 50 |
+
"description": "Help build and maintain CI/CD pipelines and cloud infrastructure. You'll learn to automate deployment processes, monitor systems, and optimize infrastructure for performance and security.",
|
| 51 |
+
"requirements": "Understanding of Linux systems and cloud platforms (AWS, Azure, GCP). Basic knowledge of containerization (Docker) and automation. Familiarity with scripting languages. Strong problem-solving abilities."
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"job_id": 6,
|
| 55 |
+
"title": "Cybersecurity Analyst",
|
| 56 |
+
"company": "SecureTech",
|
| 57 |
+
"description": "Assist in protecting organizational assets by monitoring security systems, analyzing threats, and conducting vulnerability assessments. You'll help implement security measures and respond to incidents.",
|
| 58 |
+
"requirements": "Bachelor's in Cybersecurity, Computer Science, or related field. Understanding of network security, encryption, and authentication protocols. Knowledge of security tools and basic penetration testing concepts."
|
| 59 |
+
},
|
| 60 |
+
|
| 61 |
+
# Experienced Positions
|
| 62 |
+
{
|
| 63 |
+
"job_id": 7,
|
| 64 |
+
"title": "Senior Backend Developer",
|
| 65 |
+
"company": "CloudPeak Technologies",
|
| 66 |
+
"description": "Design and implement robust, scalable backend systems that power our applications. You'll architect microservices, optimize database performance, and ensure system reliability under high load conditions.",
|
| 67 |
+
"requirements": "3+ years of experience in backend development with Python, Node.js, or Java. Strong knowledge of SQL and NoSQL databases. Experience with API design, microservices architecture, and cloud infrastructure."
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"job_id": 8,
|
| 71 |
+
"title": "Data Scientist",
|
| 72 |
+
"company": "Insight Labs",
|
| 73 |
+
"description": "Extract valuable insights from complex datasets and develop predictive models that drive business strategy. You'll collaborate with stakeholders to understand requirements and communicate findings effectively.",
|
| 74 |
+
"requirements": "Master's degree in Data Science, Statistics, or related field. 2+ years of experience in data analysis or machine learning. Proficiency in Python, R, and SQL. Experience with statistical modeling, machine learning algorithms, and data visualization."
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"job_id": 9,
|
| 78 |
+
"title": "Senior Full Stack Developer",
|
| 79 |
+
"company": "TechX Solutions",
|
| 80 |
+
"description": "Lead development of web applications from concept to deployment. You'll work across the stack to create seamless user experiences while ensuring application performance, security, and scalability.",
|
| 81 |
+
"requirements": "4+ years of experience in web development. Strong proficiency in React or Angular, Node.js, and database technologies. Experience with DevOps practices, containerization, and cloud deployment."
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"job_id": 10,
|
| 85 |
+
"title": "Lead Data Engineer",
|
| 86 |
+
"company": "DataStream Innovations",
|
| 87 |
+
"description": "Design and implement data infrastructure that enables analytics and machine learning at scale. You'll lead a team in building ETL pipelines, data warehouses, and ensuring data quality and accessibility.",
|
| 88 |
+
"requirements": "5+ years of experience in data engineering. Expertise in big data technologies like Hadoop, Spark, and data warehouse solutions. Strong programming skills and experience with cloud-based data solutions."
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"job_id": 11,
|
| 92 |
+
"title": "Machine Learning Architect",
|
| 93 |
+
"company": "IntelliAI",
|
| 94 |
+
"description": "Design cutting-edge machine learning systems and lead ML implementation strategies. You'll guide teams in developing and deploying sophisticated models that solve complex business problems.",
|
| 95 |
+
"requirements": "5+ years of experience in machine learning/AI. Advanced knowledge of deep learning frameworks, model optimization, and ML deployment. Experience leading ML projects and mentoring junior data scientists."
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"job_id": 12,
|
| 99 |
+
"title": "Cloud Solutions Architect",
|
| 100 |
+
"company": "Cloudify Corp.",
|
| 101 |
+
"description": "Design resilient, cost-effective cloud architectures that meet business requirements. You'll create migration strategies, optimize cloud resources, and implement security best practices.",
|
| 102 |
+
"requirements": "4+ years of experience in cloud computing and architecture. Certifications in AWS, Azure, or GCP. Experience with infrastructure as code, containerization, and microservices deployment."
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"job_id": 13,
|
| 106 |
+
"title": "Senior DevOps Engineer",
|
| 107 |
+
"company": "DevOps Works",
|
| 108 |
+
"description": "Lead the implementation of DevOps practices that enable continuous delivery and operational excellence. You'll automate processes, optimize infrastructure, and enhance monitoring and alerting systems.",
|
| 109 |
+
"requirements": "5+ years of experience in DevOps. Strong understanding of CI/CD tools, containerization, and infrastructure as code. Experience with cloud platforms and system reliability engineering principles."
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"job_id": 14,
|
| 113 |
+
"title": "AI Research Scientist",
|
| 114 |
+
"company": "AI Frontier",
|
| 115 |
+
"description": "Push the boundaries of AI technology through innovative research. You'll develop novel algorithms, publish findings, and translate research into practical applications that drive product development.",
|
| 116 |
+
"requirements": "PhD or Master's degree in AI/ML. 3+ years of research experience in AI with publications in reputed journals. Deep expertise in machine learning theory and implementation. Ability to translate complex research into practical solutions."
|
| 117 |
+
}
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
jobs_df = pd.DataFrame(jobs_data)
|
| 121 |
+
|
| 122 |
+
# Functions for resume processing and analysis
|
| 123 |
+
|
| 124 |
+
def extract_text_from_pdf(pdf_file):
|
| 125 |
+
"""Extract text from uploaded PDF resume."""
|
| 126 |
+
try:
|
| 127 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
| 128 |
+
text = ""
|
| 129 |
+
for page in pdf_reader.pages:
|
| 130 |
+
extracted = page.extract_text()
|
| 131 |
+
if extracted:
|
| 132 |
+
text += extracted
|
| 133 |
+
|
| 134 |
+
# If no text was extracted, the PDF might be image-based
|
| 135 |
+
if not text.strip():
|
| 136 |
+
return "The PDF appears to be image-based. Try using a text-based PDF resume."
|
| 137 |
+
|
| 138 |
+
return text
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f"PDF extraction error: {str(e)}")
|
| 141 |
+
return f"Error extracting text from PDF: {str(e)}"
|
| 142 |
+
|
| 143 |
+
def clean_resume_text(text):
|
| 144 |
+
"""Clean and preprocess resume text."""
|
| 145 |
+
# Remove special characters and extra whitespace
|
| 146 |
+
text = re.sub(r'\s+', ' ', text)
|
| 147 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
| 148 |
+
return text.lower().strip()
|
| 149 |
+
|
| 150 |
+
def extract_skills(text):
|
| 151 |
+
"""Extract potential skills from resume text."""
|
| 152 |
+
# Enhanced skills list with more technologies and soft skills
|
| 153 |
+
common_skills = [
|
| 154 |
+
# Programming Languages
|
| 155 |
+
"python", "java", "javascript", "typescript", "c++", "c#", "ruby", "php", "swift", "kotlin", "go", "rust",
|
| 156 |
+
|
| 157 |
+
# Web Technologies
|
| 158 |
+
"html", "css", "sass", "bootstrap", "tailwind", "jquery", "json", "xml", "rest", "graphql", "ajax",
|
| 159 |
+
|
| 160 |
+
# Frontend Frameworks/Libraries
|
| 161 |
+
"react", "angular", "vue", "svelte", "next.js", "gatsby", "redux", "webpack", "babel",
|
| 162 |
+
|
| 163 |
+
# Backend Technologies
|
| 164 |
+
"node", "express", "django", "flask", "spring", "rails", "laravel", "asp.net", "fastapi",
|
| 165 |
+
|
| 166 |
+
# Databases
|
| 167 |
+
"sql", "mysql", "postgresql", "mongodb", "sqlite", "oracle", "nosql", "firebase", "dynamodb", "cassandra", "redis",
|
| 168 |
+
|
| 169 |
+
# Cloud & DevOps
|
| 170 |
+
"aws", "azure", "gcp", "docker", "kubernetes", "jenkins", "circleci", "travis", "terraform", "ansible", "cicd",
|
| 171 |
+
|
| 172 |
+
# Data Science & ML
|
| 173 |
+
"machine learning", "deep learning", "data analysis", "data science", "natural language processing", "computer vision",
|
| 174 |
+
"tensorflow", "pytorch", "keras", "scikit-learn", "pandas", "numpy", "matplotlib", "seaborn", "jupyter",
|
| 175 |
+
"r", "spss", "tableau", "power bi", "data visualization", "statistics", "big data", "hadoop", "spark",
|
| 176 |
+
|
| 177 |
+
# Mobile Development
|
| 178 |
+
"android", "ios", "react native", "flutter", "xamarin", "mobile development", "app development",
|
| 179 |
+
|
| 180 |
+
# Version Control
|
| 181 |
+
"git", "github", "gitlab", "bitbucket", "version control",
|
| 182 |
+
|
| 183 |
+
# Testing
|
| 184 |
+
"unit testing", "integration testing", "jest", "mocha", "selenium", "pytest", "junit", "tdd", "bdd",
|
| 185 |
+
|
| 186 |
+
# Office & Productivity
|
| 187 |
+
"excel", "word", "powerpoint", "sharepoint", "microsoft office", "g suite", "jira", "confluence", "trello",
|
| 188 |
+
|
| 189 |
+
# Soft Skills
|
| 190 |
+
"communication", "teamwork", "leadership", "problem solving", "critical thinking", "time management",
|
| 191 |
+
"project management", "agile", "scrum", "kanban", "customer service", "presentation", "negotiation",
|
| 192 |
+
|
| 193 |
+
# Certifications (common ones)
|
| 194 |
+
"aws certified", "microsoft certified", "google certified", "comptia", "cisco certified", "pmp", "scrum master",
|
| 195 |
+
"itil", "security+", "cka", "ckad"
|
| 196 |
+
]
|
| 197 |
+
|
| 198 |
+
found_skills = []
|
| 199 |
+
text_lower = text.lower()
|
| 200 |
+
for skill in common_skills:
|
| 201 |
+
if skill in text_lower:
|
| 202 |
+
found_skills.append(skill)
|
| 203 |
+
|
| 204 |
+
return found_skills
|
| 205 |
+
|
| 206 |
+
def get_bert_embedding(text):
|
| 207 |
+
"""Get BERT embedding for a text."""
|
| 208 |
+
# Use the tokenizer's encoding method which handles truncation properly
|
| 209 |
+
encoded_input = tokenizer(
|
| 210 |
+
text,
|
| 211 |
+
return_tensors="pt",
|
| 212 |
+
truncation=True,
|
| 213 |
+
max_length=512,
|
| 214 |
+
padding="max_length"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
# Get embeddings
|
| 218 |
+
with torch.no_grad():
|
| 219 |
+
outputs = model(**encoded_input)
|
| 220 |
+
# Use the CLS token embedding (first token) as the sentence embedding
|
| 221 |
+
sentence_embedding = outputs.last_hidden_state[0][0].numpy()
|
| 222 |
+
|
| 223 |
+
return sentence_embedding
|
| 224 |
+
|
| 225 |
+
def analyze_resume(resume_text):
|
| 226 |
+
"""Analyze resume and extract key information."""
|
| 227 |
+
clean_text = clean_resume_text(resume_text)
|
| 228 |
+
skills = extract_skills(clean_text)
|
| 229 |
+
embedding = get_bert_embedding(clean_text)
|
| 230 |
+
|
| 231 |
+
return {
|
| 232 |
+
"skills": skills,
|
| 233 |
+
"embedding": embedding
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
def get_job_embeddings():
|
| 237 |
+
"""Get BERT embeddings for all jobs."""
|
| 238 |
+
job_embeddings = []
|
| 239 |
+
|
| 240 |
+
for _, job in jobs_df.iterrows():
|
| 241 |
+
job_text = f"{job['title']} {job['description']} {job['requirements']}"
|
| 242 |
+
job_embedding = get_bert_embedding(job_text)
|
| 243 |
+
job_embeddings.append(job_embedding)
|
| 244 |
+
|
| 245 |
+
return job_embeddings
|
| 246 |
+
|
| 247 |
+
def recommend_jobs(resume_analysis, top_n=3):
|
| 248 |
+
"""Recommend jobs based on resume analysis."""
|
| 249 |
+
resume_embedding = resume_analysis["embedding"]
|
| 250 |
+
job_embeddings = get_job_embeddings()
|
| 251 |
+
|
| 252 |
+
# Calculate similarities
|
| 253 |
+
similarities = []
|
| 254 |
+
for i, job_embedding in enumerate(job_embeddings):
|
| 255 |
+
similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
|
| 256 |
+
similarities.append((i, similarity))
|
| 257 |
+
|
| 258 |
+
# Sort by similarity (highest first)
|
| 259 |
+
similarities.sort(key=lambda x: x[1], reverse=True)
|
| 260 |
+
|
| 261 |
+
# Get top N recommendations
|
| 262 |
+
recommendations = []
|
| 263 |
+
for i in range(min(top_n, len(similarities))):
|
| 264 |
+
job_idx, similarity_score = similarities[i]
|
| 265 |
+
job = jobs_df.iloc[job_idx]
|
| 266 |
+
recommendations.append({
|
| 267 |
+
"job_id": job["job_id"],
|
| 268 |
+
"title": job["title"],
|
| 269 |
+
"company": job["company"],
|
| 270 |
+
"similarity": round(similarity_score * 100, 2),
|
| 271 |
+
"description": job["description"],
|
| 272 |
+
"requirements": job["requirements"]
|
| 273 |
+
})
|
| 274 |
+
|
| 275 |
+
return recommendations
|
| 276 |
+
|
| 277 |
+
def match_resume_to_job(resume_text, job_title, job_description):
|
| 278 |
+
"""Match a resume to a custom job description and provide suggestions for improvement."""
|
| 279 |
+
if not resume_text or not job_description:
|
| 280 |
+
return "Please provide both a resume and a job description.", ""
|
| 281 |
+
|
| 282 |
+
# Analyze resume
|
| 283 |
+
resume_analysis = analyze_resume(resume_text)
|
| 284 |
+
resume_skills = set(resume_analysis["skills"])
|
| 285 |
+
|
| 286 |
+
# Get embedding for the job description
|
| 287 |
+
job_text = f"{job_title} {job_description}"
|
| 288 |
+
job_embedding = get_bert_embedding(job_text)
|
| 289 |
+
|
| 290 |
+
# Calculate similarity
|
| 291 |
+
resume_embedding = resume_analysis["embedding"]
|
| 292 |
+
similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
|
| 293 |
+
match_score = round(similarity * 100, 2)
|
| 294 |
+
|
| 295 |
+
# Extract skills from job description
|
| 296 |
+
job_skills = set(extract_skills(job_description))
|
| 297 |
+
|
| 298 |
+
# Find skills that match and skills that are missing
|
| 299 |
+
matching_skills = resume_skills.intersection(job_skills)
|
| 300 |
+
missing_skills = job_skills - resume_skills
|
| 301 |
+
|
| 302 |
+
# Generate improvement suggestions
|
| 303 |
+
suggestions = []
|
| 304 |
+
|
| 305 |
+
if match_score < 50:
|
| 306 |
+
suggestions.append("Your resume shows low alignment with this job. Consider tailoring it specifically for this position.")
|
| 307 |
+
|
| 308 |
+
if missing_skills:
|
| 309 |
+
suggestions.append(f"The job requires skills you haven't highlighted: {', '.join(missing_skills)}. If you have these skills, add them to your resume.")
|
| 310 |
+
|
| 311 |
+
if len(matching_skills) < 3:
|
| 312 |
+
suggestions.append("Try to emphasize more relevant skills and experiences that align with the job requirements.")
|
| 313 |
+
|
| 314 |
+
# Check if the resume is possibly too generic
|
| 315 |
+
if match_score < 60 and len(resume_skills) > 15:
|
| 316 |
+
suggestions.append("Your resume may be too broad. Focus on highlighting experiences and skills most relevant to this specific position.")
|
| 317 |
+
|
| 318 |
+
# For entry-level positions
|
| 319 |
+
if "junior" in job_title.lower() or "entry" in job_title.lower():
|
| 320 |
+
if "project" not in resume_text.lower() and "project" not in suggestions:
|
| 321 |
+
suggestions.append("For entry-level positions, consider adding academic or personal projects that demonstrate your skills.")
|
| 322 |
+
|
| 323 |
+
if "internship" not in resume_text.lower() and "coursework" not in resume_text.lower() and "education" not in suggestions:
|
| 324 |
+
suggestions.append("Highlight relevant coursework, certifications, or internships to compensate for limited work experience.")
|
| 325 |
+
|
| 326 |
+
# For technical positions
|
| 327 |
+
if any(tech in job_title.lower() for tech in ["developer", "engineer", "programmer", "data", "analyst"]):
|
| 328 |
+
if len([s for s in resume_skills if s in ["github", "gitlab", "bitbucket"]]) == 0:
|
| 329 |
+
suggestions.append("Consider adding a link to your GitHub/GitLab profile to showcase your technical projects.")
|
| 330 |
+
|
| 331 |
+
# Prepare output
|
| 332 |
+
analysis_result = f"Match Score: {match_score}%\n"
|
| 333 |
+
analysis_result += f"Matching Skills: {', '.join(matching_skills) if matching_skills else 'None detected'}\n"
|
| 334 |
+
analysis_result += f"Skills to Add: {', '.join(missing_skills) if missing_skills else 'None'}"
|
| 335 |
+
|
| 336 |
+
improvement_suggestions = "### Suggestions for Improvement\n\n"
|
| 337 |
+
if suggestions:
|
| 338 |
+
for i, suggestion in enumerate(suggestions, 1):
|
| 339 |
+
improvement_suggestions += f"{i}. {suggestion}\n\n"
|
| 340 |
+
else:
|
| 341 |
+
improvement_suggestions += "Your resume appears to be well-aligned with this job posting. Consider customizing your cover letter to further highlight your relevant experiences."
|
| 342 |
+
|
| 343 |
+
return analysis_result, improvement_suggestions
|
| 344 |
+
|
| 345 |
+
def process_resume(pdf_file):
|
| 346 |
+
"""Process the uploaded resume and return analysis and recommendations."""
|
| 347 |
+
if pdf_file is None:
|
| 348 |
+
return "Please upload a PDF resume.", "No recommendations yet. Upload your resume first."
|
| 349 |
+
|
| 350 |
+
try:
|
| 351 |
+
# Extract text from PDF
|
| 352 |
+
resume_text = extract_text_from_pdf(pdf_file)
|
| 353 |
+
|
| 354 |
+
# Check if we got a string error message instead of actual content
|
| 355 |
+
if isinstance(resume_text, str) and (resume_text.startswith("Error") or resume_text.startswith("The PDF appears")):
|
| 356 |
+
return resume_text, "No recommendations available. Please check the PDF file."
|
| 357 |
+
|
| 358 |
+
# Check if we have enough text to analyze
|
| 359 |
+
if not resume_text or len(resume_text) < 50:
|
| 360 |
+
return "Warning: Could not extract sufficient text from the PDF. Please ensure it's a text-based PDF, not a scanned image.", "No recommendations available."
|
| 361 |
+
|
| 362 |
+
print(f"Extracted {len(resume_text)} characters from resume")
|
| 363 |
+
|
| 364 |
+
# Analyze the resume
|
| 365 |
+
resume_analysis = analyze_resume(resume_text)
|
| 366 |
+
|
| 367 |
+
# Get job recommendations
|
| 368 |
+
recommendations = recommend_jobs(resume_analysis)
|
| 369 |
+
|
| 370 |
+
# Prepare output
|
| 371 |
+
skills_found = ", ".join(resume_analysis["skills"]) if resume_analysis["skills"] else "No common skills detected"
|
| 372 |
+
analysis_result = f"Skills found: {skills_found}"
|
| 373 |
+
|
| 374 |
+
# Format recommendations as markdown
|
| 375 |
+
formatted_recommendations = "### Top Job Recommendations\n\n"
|
| 376 |
+
if not recommendations:
|
| 377 |
+
formatted_recommendations += "No strong job matches found. Consider adding more skills and experiences to your resume."
|
| 378 |
+
else:
|
| 379 |
+
for i, rec in enumerate(recommendations, 1):
|
| 380 |
+
formatted_recommendations += f"#### {i}. {rec['title']} at {rec['company']}\n"
|
| 381 |
+
formatted_recommendations += f"*Match Score:* {rec['similarity']}%\n\n"
|
| 382 |
+
formatted_recommendations += f"*Description:* {rec['description']}\n\n"
|
| 383 |
+
formatted_recommendations += f"*Requirements:* {rec['requirements']}\n\n"
|
| 384 |
+
if i < len(recommendations):
|
| 385 |
+
formatted_recommendations += "---\n\n"
|
| 386 |
+
|
| 387 |
+
return analysis_result, formatted_recommendations, resume_text
|
| 388 |
+
|
| 389 |
+
except Exception as e:
|
| 390 |
+
import traceback
|
| 391 |
+
error_details = traceback.format_exc()
|
| 392 |
+
print(f"Error details: {error_details}")
|
| 393 |
+
return f"Error processing resume: {str(e)}", "An error occurred. Please try again with a different PDF.", ""
|
| 394 |
+
|
| 395 |
+
def process_job_match(resume_text, job_title, job_description):
|
| 396 |
+
"""Process the match between resume and job description."""
|
| 397 |
+
if not resume_text:
|
| 398 |
+
return "No resume available. Please upload your resume first.", ""
|
| 399 |
+
|
| 400 |
+
if not job_title or not job_description:
|
| 401 |
+
return "Please provide both job title and description.", ""
|
| 402 |
+
|
| 403 |
+
try:
|
| 404 |
+
# Match resume to the job
|
| 405 |
+
analysis, suggestions = match_resume_to_job(resume_text, job_title, job_description)
|
| 406 |
+
return analysis, suggestions
|
| 407 |
+
|
| 408 |
+
except Exception as e:
|
| 409 |
+
import traceback
|
| 410 |
+
error_details = traceback.format_exc()
|
| 411 |
+
print(f"Error details: {error_details}")
|
| 412 |
+
return f"Error matching resume to job: {str(e)}", "An error occurred. Please try again."
|
| 413 |
+
|
| 414 |
+
# Create Gradio interface with tabs
|
| 415 |
+
with gr.Blocks(title="Resume Analyzer & Job Recommender") as app:
|
| 416 |
+
# Storage for resume text between tabs
|
| 417 |
+
resume_text_state = gr.State("")
|
| 418 |
+
|
| 419 |
+
# Title and description styling
|
| 420 |
+
gr.Markdown("""
|
| 421 |
+
<h1 style="text-align: center; color: #e35117;">Resume Analyzer and Job Recommender</h1>
|
| 422 |
+
<p style="text-align: center; color: #6C757D;">Upload your resume to get an analysis, job recommendations, or match with specific job descriptions</p>
|
| 423 |
+
""")
|
| 424 |
+
|
| 425 |
+
# Tabs for different functionalities
|
| 426 |
+
with gr.Tabs():
|
| 427 |
+
# Tab 1: Resume Analysis and Recommendations
|
| 428 |
+
with gr.TabItem("Resume Analysis & Recommendations"):
|
| 429 |
+
with gr.Row():
|
| 430 |
+
# First Column for file upload and tips
|
| 431 |
+
with gr.Column(scale=2):
|
| 432 |
+
resume_input = gr.File(label="Upload Resume (PDF)", elem_id="resume_input")
|
| 433 |
+
submit_btn = gr.Button("Analyze Resume", elem_id="submit_btn")
|
| 434 |
+
|
| 435 |
+
gr.Markdown("""
|
| 436 |
+
### Tips for best results:
|
| 437 |
+
- Use a text-based PDF (not a scanned image)
|
| 438 |
+
- Make sure your resume is properly formatted
|
| 439 |
+
- Include key skills in your resume
|
| 440 |
+
""")
|
| 441 |
+
|
| 442 |
+
# Second Column for outputs
|
| 443 |
+
with gr.Column(scale=3):
|
| 444 |
+
analysis_output = gr.Textbox(label="Resume Analysis", lines=6, placeholder="Your resume analysis will appear here...", elem_id="analysis_output")
|
| 445 |
+
recommendations_output = gr.Markdown(label="Job Recommendations", elem_id="recommendations_output")
|
| 446 |
+
|
| 447 |
+
# Tab 2: Job Match Analysis
|
| 448 |
+
with gr.TabItem("Match with Job Description"):
|
| 449 |
+
with gr.Row():
|
| 450 |
+
# First Column for job description input
|
| 451 |
+
with gr.Column(scale=2):
|
| 452 |
+
job_title_input = gr.Textbox(label="Job Title", placeholder="Enter the job title here...", elem_id="job_title_input")
|
| 453 |
+
job_description_input = gr.Textbox(label="Job Description", placeholder="Paste the full job description here...", lines=10, elem_id="job_description_input")
|
| 454 |
+
job_match_btn = gr.Button("Match with Resume", elem_id="job_match_btn")
|
| 455 |
+
|
| 456 |
+
gr.Markdown("""
|
| 457 |
+
### How to use this feature:
|
| 458 |
+
1. Upload your resume in the "Resume Analysis" tab first
|
| 459 |
+
2. Paste a job description you're interested in
|
| 460 |
+
3. Click "Match with Resume" to see how well your resume matches
|
| 461 |
+
4. Review suggestions to improve your application
|
| 462 |
+
""")
|
| 463 |
+
|
| 464 |
+
# Second Column for outputs
|
| 465 |
+
with gr.Column(scale=3):
|
| 466 |
+
job_match_output = gr.Textbox(label="Match Analysis", lines=6, placeholder="Your job match analysis will appear here...", elem_id="job_match_output")
|
| 467 |
+
improvement_suggestions = gr.Markdown(label="Improvement Suggestions", elem_id="improvement_suggestions")
|
| 468 |
+
|
| 469 |
+
# Button click actions
|
| 470 |
+
submit_btn.click(
|
| 471 |
+
process_resume,
|
| 472 |
+
inputs=[resume_input],
|
| 473 |
+
outputs=[analysis_output, recommendations_output, resume_text_state]
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
job_match_btn.click(
|
| 477 |
+
process_job_match,
|
| 478 |
+
inputs=[resume_text_state, job_title_input, job_description_input],
|
| 479 |
+
outputs=[job_match_output, improvement_suggestions]
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
# Add custom CSS for better styling
|
| 483 |
+
css = """
|
| 484 |
+
#submit_btn, #job_match_btn {
|
| 485 |
+
background-color:#e35117;
|
| 486 |
+
color: white;
|
| 487 |
+
font-weight: bold;
|
| 488 |
+
border-radius: 5px;
|
| 489 |
+
padding: 10px;
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
#submit_btn:hover, #job_match_btn:hover {
|
| 493 |
+
background-color: #e35117;
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
#resume_input input[type="file"] {
|
| 497 |
+
padding: 8px;
|
| 498 |
+
border: 1px solid #ddd;
|
| 499 |
+
border-radius: 5px;
|
| 500 |
+
background-color: #ffffff;
|
| 501 |
+
transition: border-color 0.3s ease;
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
#resume_input input[type="file"]:hover {
|
| 505 |
+
border-color: #e35117;
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
#analysis_output, #job_match_output {
|
| 509 |
+
background-color: #030000;
|
| 510 |
+
border-radius: 5px;
|
| 511 |
+
padding: 10px;
|
| 512 |
+
font-size: 14px;
|
| 513 |
+
border: 1px solid #ddd;
|
| 514 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
#recommendations_output, #improvement_suggestions {
|
| 518 |
+
background-color:#030000;
|
| 519 |
+
border-radius: 5px;
|
| 520 |
+
padding: 10px;
|
| 521 |
+
font-size: 14px;
|
| 522 |
+
border: 1px solid #ddd;
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
.tabs {
|
| 526 |
+
margin-top: 20px;
|
| 527 |
+
}
|
| 528 |
+
"""
|
| 529 |
+
|
| 530 |
+
# Apply custom CSS
|
| 531 |
+
app.css = css
|
| 532 |
+
|
| 533 |
+
# Launch the app
|
| 534 |
+
if __name__ == "__main__":
|
| 535 |
+
app.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers==4.34.0
|
| 2 |
+
pandas==2.1.1
|
| 3 |
+
numpy==1.26.0
|
| 4 |
+
PyPDF2==3.0.1
|
| 5 |
+
scikit-learn==1.3.1
|
| 6 |
+
gradio==4.5.0
|
| 7 |
+
torch==2.1.0
|