Spaces:

its-zion-18
/

career_planner

Sleeping

App Files Files Community

its-zion-18 commited on Oct 9

Commit

9257d69

verified ·

1 Parent(s): fc8bc3e

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -59

app.py CHANGED Viewed

@@ -12,23 +12,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from nltk.stem import PorterStemmer
 import gradio as gr
-# --- Download necessary NLTK data ---
-try:
-    nltk.data.find('corpora/words')
-except LookupError:
-    nltk.download('words', quiet=True)
-try:
-    nltk.data.find('corpora/stopwords')
-except LookupError:
-    nltk.download('stopwords', quiet=True)
-try:
-    nltk.data.find('taggers/averaged_perceptron_tagger')
-except LookupError:
-    nltk.download('averaged_perceptron_tagger', quiet=True)
-try:
-    nltk.data.find('tokenizers/punkt')
-except LookupError:
-    nltk.download('punkt', quiet=True)
 STOPWORDS = set(stopwords.words('english'))
 stemmer = PorterStemmer()
@@ -41,7 +32,7 @@ combined_job_embeddings = None
 original_job_title_embeddings = None
 LLM_PIPELINE = None
 LLM_MODEL_NAME = "microsoft/phi-2"
-FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
 KNOWN_WORDS = set()
 # --- CORE NLP & HELPER FUNCTIONS ---
@@ -212,37 +203,39 @@ def _course_links_for(skill: str) -> str:
 # --- GRADIO INTERFACE FUNCTIONS ---
-### --- FIX #1A: The `get_job_matches` function now returns 5 items, including the initial results (`emb_matches`) for the state ---
 def get_job_matches(dream_job: str, top_n: int, skills_text: str):
     status = "Searching using hybrid model..."
     expanded_desc = llm_expand_query(dream_job)
-    emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     if user_skills:
         display_df = score_jobs_by_skills(user_skills, emb_matches)
-        status = f"Found and **re-ranked** results by your {len(user_skills)} skills."
     else:
         display_df = emb_matches
-        status = f"Found {len(emb_matches)} top matches using semantic search."
     display_df = display_df.head(top_n)
     table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
     if 'Skill Match Score' in display_df.columns:
         table_to_show['Skill Match Score'] = display_df['Skill Match Score']
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True)
 def rerank_current_results(initial_matches_df, skills_text, top_n):
     if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
         return "Please find matches first before re-ranking.", pd.DataFrame(), gr.Dropdown(visible=False)
-    # Ensure we are working with a DataFrame
     initial_matches_df = pd.DataFrame(initial_matches_df)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     if not user_skills:
         status = "Skills cleared. Showing original semantic search results."
@@ -258,7 +251,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True)
-### --- FIX #1B: These wrapper functions now handle the 5 return values correctly ---
 def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
     if not dream_job:
         return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False)
@@ -267,7 +259,7 @@ def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: st
         word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
         alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True)
     status, emb_matches, table_to_show, dropdown, details_accordion = get_job_matches(dream_job, top_n, skills_text)
     return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
@@ -276,55 +268,85 @@ def find_matches_and_rank_anyway(dream_job: str, top_n: int, skills_text: str):
     return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
 def on_select_job(job_id, skills_text):
-    if job_id is None: return "", "", "", "", "", gr.Accordion(visible=False)
     row = original_df.loc[job_id]
     title, company = str(row.get("job_title", "")), str(row.get("company", ""))
     job_details_markdown = f"### {title} — {company}"
     duties, qualifications, description = str(row.get('Duties', '')), str(row.get('qualifications', '')), str(row.get('Description', ''))
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
-    if not user_skills:
-        learning_plan_html = "<p><i>Enter your skills and click 'Re-rank' to see a personalized learning plan.</i></p>"
-    else:
-        job_skills = row.get("Skills", [])
-        matched_skills = [s for s in job_skills if any(_skill_match(ut, s) for ut in user_skills)]
-        missing_skills = [s for s in job_skills if s not in matched_skills]
-        score_val = len(matched_skills) / len(job_skills) if len(job_skills) > 0 else 0.0
-        headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
         job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
-        if not missing_skills:
-            learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
-        else:
-            learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
-            items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in sorted(missing_skills, key=lambda x: x.lower())[:5]]
-            learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
-    return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True)
 def on_reset():
-    # Now returns an extra `None` for the new state component
-    return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False))
 # --- Run Initialization ---
 print("Starting application initialization...")
 initialization_status = initialize_data_and_model()
 print(initialization_status)
-# --- Gradio Interface Definition (Exact layout you provided) ---
 with gr.Blocks(theme=gr.themes.Soft()) as ui:
     gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
-    ### --- FIX #1C: A State component is defined to hold the initial results ---
     initial_matches_state = gr.State()
     with gr.Row():
         with gr.Column(scale=3):
             dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
             with gr.Accordion("Optional: Add Your Skills to Re-rank Results", open=False):
-                  with gr.Row():
-                      skills_text = gr.Textbox(label='Your Skills (comma-separated)', placeholder="e.g., Python, data analysis", scale=3)
-                      rerank_btn = gr.Button("Re-rank", variant="secondary", scale=1)
         with gr.Column(scale=1):
             topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
             search_btn = gr.Button("Find Matches", variant="primary")
@@ -342,7 +364,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
     with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
         job_details_markdown = gr.Markdown()
-        ### --- FIX #2: The Tabs are now placed before the Learning Plan ---
         with gr.Tabs():
             with gr.TabItem("Duties"):
                 duties_markdown = gr.Markdown()
@@ -352,10 +373,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
                 description_markdown = gr.Markdown()
         learning_plan_output = gr.HTML(label="Learning Plan")
     # --- Event Handlers ---
-    ### --- FIX #1D: The search button outputs now include `initial_matches_state` to fix the re-rank button ---
     search_btn.click(
         fn=find_matches_and_rank_with_check,
         inputs=[dream_text, topk_slider, skills_text],
@@ -372,7 +392,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
     )
     reset_btn.click(
         fn=on_reset,
-        outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row],
         queue=False
     )
     rerank_btn.click(
@@ -383,7 +403,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
     job_selector.change(
         fn=on_select_job,
         inputs=[job_selector, skills_text],
-        outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion]
     )
 ui.launch()

 from nltk.stem import PorterStemmer
 import gradio as gr
+# --- CORRECTED: Download necessary NLTK data ---
+# This revised block is more direct and ensures all packages are downloaded.
+for package in ['words', 'stopwords', 'averaged_perceptron_tagger', 'punkt']:
+    try:
+        nltk.data.find(f'corpora/{package}' if package in ['words', 'stopwords'] else f'taggers/{package}' if package == 'averaged_perceptron_tagger' else f'tokenizers/{package}')
+    except LookupError:
+        nltk.download(package)
+# ------------------------------------------------
 STOPWORDS = set(stopwords.words('english'))
 stemmer = PorterStemmer()
 original_job_title_embeddings = None
 LLM_PIPELINE = None
 LLM_MODEL_NAME = "microsoft/phi-2"
+FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
 KNOWN_WORDS = set()
 # --- CORE NLP & HELPER FUNCTIONS ---
 # --- GRADIO INTERFACE FUNCTIONS ---
 def get_job_matches(dream_job: str, top_n: int, skills_text: str):
     status = "Searching using hybrid model..."
     expanded_desc = llm_expand_query(dream_job)
+    emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     if user_skills:
         display_df = score_jobs_by_skills(user_skills, emb_matches)
     else:
         display_df = emb_matches
     display_df = display_df.head(top_n)
+    if user_skills:
+        status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
+    else:
+        status = f"Found {len(display_df)} top matches using semantic search."
     table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
     if 'Skill Match Score' in display_df.columns:
         table_to_show['Skill Match Score'] = display_df['Skill Match Score']
     dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True)
 def rerank_current_results(initial_matches_df, skills_text, top_n):
     if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
         return "Please find matches first before re-ranking.", pd.DataFrame(), gr.Dropdown(visible=False)
     initial_matches_df = pd.DataFrame(initial_matches_df)
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
     if not user_skills:
         status = "Skills cleared. Showing original semantic search results."
     dropdown_value = dropdown_options[0][1] if dropdown_options else None
     return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True)
 def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
     if not dream_job:
         return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False)
         word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
         alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
         return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True)
     status, emb_matches, table_to_show, dropdown, details_accordion = get_job_matches(dream_job, top_n, skills_text)
     return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
     return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
 def on_select_job(job_id, skills_text):
+    if job_id is None:
+        return "", "", "", "", "", gr.Accordion(visible=False), [], 0, gr.Button(visible=False)
     row = original_df.loc[job_id]
     title, company = str(row.get("job_title", "")), str(row.get("company", ""))
     job_details_markdown = f"### {title} — {company}"
     duties, qualifications, description = str(row.get('Duties', '')), str(row.get('qualifications', '')), str(row.get('Description', ''))
     user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
+    job_skills = row.get("Skills", [])
+    if not job_skills:
+        learning_plan_html = "<p><i>No specific skills were extracted for this job.</i></p>"
+        return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
+    all_missing_skills = sorted([s for s in job_skills if not any(_skill_match(ut, s) for ut in user_skills)], key=lambda x: x.lower())
+    if not all_missing_skills:
+        learning_plan_html = "<h4 style='color:green;'>🎉 You have all the required skills!</h4>"
+        return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
+    if user_skills:
+        score_val = (len(job_skills) - len(all_missing_skills)) / len(job_skills)
         job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
+        headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
+        learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
+        skills_to_display = all_missing_skills[:5]
+        items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
+        learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
+        return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
+    else:
+        headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
+        skills_to_display = all_missing_skills[:5]
+        items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
+        learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
+        full_skill_list_for_state = all_missing_skills
+        new_offset = len(skills_to_display)
+        should_button_be_visible = len(all_missing_skills) > 5
+        return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
+def load_more_skills(full_skills_list, current_offset):
+    SKILLS_INCREMENT = 5
+    new_offset = current_offset + SKILLS_INCREMENT
+    skills_to_display = full_skills_list[:new_offset]
+    items_html = [f"<li><b>{ms}</b><br>• Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
+    learning_plan_html = f"<h4>To be a good fit for this role, you'll need to learn these skills:</h4><ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
+    should_button_be_visible = new_offset < len(full_skills_list)
+    return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
 def on_reset():
+    return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False))
 # --- Run Initialization ---
 print("Starting application initialization...")
 initialization_status = initialize_data_and_model()
 print(initialization_status)
+# --- Gradio Interface Definition ---
 with gr.Blocks(theme=gr.themes.Soft()) as ui:
     gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
     initial_matches_state = gr.State()
+    missing_skills_state = gr.State([])
+    skills_offset_state = gr.State(0)
     with gr.Row():
         with gr.Column(scale=3):
             dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
             with gr.Accordion("Optional: Add Your Skills to Re-rank Results", open=False):
+                with gr.Row():
+                    skills_text = gr.Textbox(label='Your Skills (comma-separated)', placeholder="e.g., Python, data analysis", scale=3)
+                    rerank_btn = gr.Button("Re-rank", variant="secondary", scale=1)
         with gr.Column(scale=1):
             topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
             search_btn = gr.Button("Find Matches", variant="primary")
     with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
         job_details_markdown = gr.Markdown()
         with gr.Tabs():
             with gr.TabItem("Duties"):
                 duties_markdown = gr.Markdown()
                 description_markdown = gr.Markdown()
         learning_plan_output = gr.HTML(label="Learning Plan")
+        load_more_btn = gr.Button("Load More Skills", visible=False)
     # --- Event Handlers ---
     search_btn.click(
         fn=find_matches_and_rank_with_check,
         inputs=[dream_text, topk_slider, skills_text],
     )
     reset_btn.click(
         fn=on_reset,
+        outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn],
         queue=False
     )
     rerank_btn.click(
     job_selector.change(
         fn=on_select_job,
         inputs=[job_selector, skills_text],
+        outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn]
+    )
+    load_more_btn.click(
+        fn=load_more_skills,
+        inputs=[missing_skills_state, skills_offset_state],
+        outputs=[learning_plan_output, skills_offset_state, load_more_btn]
     )
 ui.launch()