# Import necessary libraries
import PyPDF2
from transformers import pipeline
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load the pre-trained model for Named Entity Recognition (NER)
model = pipeline('ner', model='dbmdz/bert-large-cased-finetuned-conll03-english')

# Step 2: Extract text from uploaded PDF resume
def extract_text_from_pdf(pdf_file):
    with open(pdf_file, "rb") as file:
        reader = PyPDF2.PdfFileReader(file)
        text = ""
        for page in range(reader.numPages):
            text += reader.getPage(page).extract_text()
    return text

# Step 3: Extract relevant information (skills, job titles, etc.) from text using the model
def analyze_resume(resume_text):
    model_output = model(resume_text)
    
    # Extract skills and job titles
    skills = [item['word'] for item in model_output if item['entity'] == 'SKILL']
    job_title = [item['word'] for item in model_output if item['entity'] == 'JOB_TITLE']
    
    # Returning extracted data
    return {'skills': skills, 'job_title': job_title}

# Step 4: Calculate similarity between the job description and the resume
def calculate_similarity(job_desc, resume_text):
    # Create a Tfidf Vectorizer to convert text into vectors
    vectorizer = TfidfVectorizer(stop_words='english')
    
    # Combine the job description and resume text into a single list
    documents = [job_desc, resume_text]
    
    # Convert the texts into TF-IDF vectors
    tfidf_matrix = vectorizer.fit_transform(documents)
    
    # Compute cosine similarity between job description and resume
    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
    return similarity[0][0]

# Step 5: Check if the resume matches the job description and return "Good Fit" or "Not Fit"
def match_job_description(job_desc, resume_file):
    # Extract text from resume
    resume_text = extract_text_from_pdf(resume_file.name)
    
    # Calculate the similarity between job description and resume
    similarity_score = calculate_similarity(job_desc, resume_text)
    
    # If the similarity score is greater than a threshold (e.g., 0.7), it's a good fit
    if similarity_score >= 0.7:
        return "Good Fit"
    else:
        return "Not Fit"

# Step 6: Gradio Interface function
def process_resume(job_desc, resume_file):
    # Match the job description with the uploaded resume
    result = match_job_description(job_desc, resume_file)
    return result

# Step 7: Create Gradio Interface
interface = gr.Interface(
    fn=process_resume, 
    inputs=["text", "file"],  # Job Description (text) and Resume (file) as inputs
    outputs="text",  # Output will be a simple text response (Good Fit / Not Fit)
    live=True
)

# Step 8: Launch the app
interface.launch()