its-zion-18 commited on
Commit
9257d69
Β·
verified Β·
1 Parent(s): fc8bc3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -59
app.py CHANGED
@@ -12,23 +12,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
12
  from nltk.stem import PorterStemmer
13
  import gradio as gr
14
 
15
- # --- Download necessary NLTK data ---
16
- try:
17
- nltk.data.find('corpora/words')
18
- except LookupError:
19
- nltk.download('words', quiet=True)
20
- try:
21
- nltk.data.find('corpora/stopwords')
22
- except LookupError:
23
- nltk.download('stopwords', quiet=True)
24
- try:
25
- nltk.data.find('taggers/averaged_perceptron_tagger')
26
- except LookupError:
27
- nltk.download('averaged_perceptron_tagger', quiet=True)
28
- try:
29
- nltk.data.find('tokenizers/punkt')
30
- except LookupError:
31
- nltk.download('punkt', quiet=True)
32
 
33
  STOPWORDS = set(stopwords.words('english'))
34
  stemmer = PorterStemmer()
@@ -41,7 +32,7 @@ combined_job_embeddings = None
41
  original_job_title_embeddings = None
42
  LLM_PIPELINE = None
43
  LLM_MODEL_NAME = "microsoft/phi-2"
44
- FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
45
  KNOWN_WORDS = set()
46
 
47
  # --- CORE NLP & HELPER FUNCTIONS ---
@@ -212,37 +203,39 @@ def _course_links_for(skill: str) -> str:
212
 
213
  # --- GRADIO INTERFACE FUNCTIONS ---
214
 
215
- ### --- FIX #1A: The `get_job_matches` function now returns 5 items, including the initial results (`emb_matches`) for the state ---
216
  def get_job_matches(dream_job: str, top_n: int, skills_text: str):
217
  status = "Searching using hybrid model..."
218
  expanded_desc = llm_expand_query(dream_job)
219
- emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
220
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
221
-
222
  if user_skills:
223
  display_df = score_jobs_by_skills(user_skills, emb_matches)
224
- status = f"Found and **re-ranked** results by your {len(user_skills)} skills."
225
  else:
226
  display_df = emb_matches
227
- status = f"Found {len(emb_matches)} top matches using semantic search."
228
 
229
  display_df = display_df.head(top_n)
 
 
 
 
 
 
230
  table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
231
  if 'Skill Match Score' in display_df.columns:
232
  table_to_show['Skill Match Score'] = display_df['Skill Match Score']
233
-
234
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
235
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
236
-
237
  return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True)
238
 
239
  def rerank_current_results(initial_matches_df, skills_text, top_n):
240
  if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
241
  return "Please find matches first before re-ranking.", pd.DataFrame(), gr.Dropdown(visible=False)
242
-
243
- # Ensure we are working with a DataFrame
244
  initial_matches_df = pd.DataFrame(initial_matches_df)
245
-
246
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
247
  if not user_skills:
248
  status = "Skills cleared. Showing original semantic search results."
@@ -258,7 +251,6 @@ def rerank_current_results(initial_matches_df, skills_text, top_n):
258
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
259
  return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True)
260
 
261
- ### --- FIX #1B: These wrapper functions now handle the 5 return values correctly ---
262
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
263
  if not dream_job:
264
  return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False)
@@ -267,7 +259,7 @@ def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: st
267
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
268
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
269
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True)
270
-
271
  status, emb_matches, table_to_show, dropdown, details_accordion = get_job_matches(dream_job, top_n, skills_text)
272
  return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
273
 
@@ -276,55 +268,85 @@ def find_matches_and_rank_anyway(dream_job: str, top_n: int, skills_text: str):
276
  return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
277
 
278
  def on_select_job(job_id, skills_text):
279
- if job_id is None: return "", "", "", "", "", gr.Accordion(visible=False)
 
 
280
  row = original_df.loc[job_id]
281
  title, company = str(row.get("job_title", "")), str(row.get("company", ""))
282
  job_details_markdown = f"### {title} β€” {company}"
283
  duties, qualifications, description = str(row.get('Duties', '')), str(row.get('qualifications', '')), str(row.get('Description', ''))
 
284
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
 
285
 
286
- if not user_skills:
287
- learning_plan_html = "<p><i>Enter your skills and click 'Re-rank' to see a personalized learning plan.</i></p>"
288
- else:
289
- job_skills = row.get("Skills", [])
290
- matched_skills = [s for s in job_skills if any(_skill_match(ut, s) for ut in user_skills)]
291
- missing_skills = [s for s in job_skills if s not in matched_skills]
292
- score_val = len(matched_skills) / len(job_skills) if len(job_skills) > 0 else 0.0
293
- headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
 
 
 
 
294
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
 
 
 
 
 
295
 
296
- if not missing_skills:
297
- learning_plan_html = "<h4 style='color:green;'>πŸŽ‰ You have all the required skills!</h4>"
298
- else:
299
- learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
300
- items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in sorted(missing_skills, key=lambda x: x.lower())[:5]]
301
- learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
302
-
303
- return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  def on_reset():
306
- # Now returns an extra `None` for the new state component
307
- return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False))
308
 
309
  # --- Run Initialization ---
310
  print("Starting application initialization...")
311
  initialization_status = initialize_data_and_model()
312
  print(initialization_status)
313
 
314
- # --- Gradio Interface Definition (Exact layout you provided) ---
315
  with gr.Blocks(theme=gr.themes.Soft()) as ui:
316
  gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
317
 
318
- ### --- FIX #1C: A State component is defined to hold the initial results ---
319
  initial_matches_state = gr.State()
 
 
320
 
321
  with gr.Row():
322
  with gr.Column(scale=3):
323
  dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
324
  with gr.Accordion("Optional: Add Your Skills to Re-rank Results", open=False):
325
- with gr.Row():
326
- skills_text = gr.Textbox(label='Your Skills (comma-separated)', placeholder="e.g., Python, data analysis", scale=3)
327
- rerank_btn = gr.Button("Re-rank", variant="secondary", scale=1)
328
  with gr.Column(scale=1):
329
  topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
330
  search_btn = gr.Button("Find Matches", variant="primary")
@@ -342,7 +364,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
342
  with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
343
  job_details_markdown = gr.Markdown()
344
 
345
- ### --- FIX #2: The Tabs are now placed before the Learning Plan ---
346
  with gr.Tabs():
347
  with gr.TabItem("Duties"):
348
  duties_markdown = gr.Markdown()
@@ -352,10 +373,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
352
  description_markdown = gr.Markdown()
353
 
354
  learning_plan_output = gr.HTML(label="Learning Plan")
 
355
 
356
  # --- Event Handlers ---
357
-
358
- ### --- FIX #1D: The search button outputs now include `initial_matches_state` to fix the re-rank button ---
359
  search_btn.click(
360
  fn=find_matches_and_rank_with_check,
361
  inputs=[dream_text, topk_slider, skills_text],
@@ -372,7 +392,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
372
  )
373
  reset_btn.click(
374
  fn=on_reset,
375
- outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row],
376
  queue=False
377
  )
378
  rerank_btn.click(
@@ -383,7 +403,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
383
  job_selector.change(
384
  fn=on_select_job,
385
  inputs=[job_selector, skills_text],
386
- outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion]
 
 
 
 
 
387
  )
388
 
389
  ui.launch()
 
12
  from nltk.stem import PorterStemmer
13
  import gradio as gr
14
 
15
+ # --- CORRECTED: Download necessary NLTK data ---
16
+ # This revised block is more direct and ensures all packages are downloaded.
17
+ for package in ['words', 'stopwords', 'averaged_perceptron_tagger', 'punkt']:
18
+ try:
19
+ nltk.data.find(f'corpora/{package}' if package in ['words', 'stopwords'] else f'taggers/{package}' if package == 'averaged_perceptron_tagger' else f'tokenizers/{package}')
20
+ except LookupError:
21
+ nltk.download(package)
22
+ # ------------------------------------------------
 
 
 
 
 
 
 
 
 
23
 
24
  STOPWORDS = set(stopwords.words('english'))
25
  stemmer = PorterStemmer()
 
32
  original_job_title_embeddings = None
33
  LLM_PIPELINE = None
34
  LLM_MODEL_NAME = "microsoft/phi-2"
35
+ FINETUNED_MODEL_ID = "its-zion-18/projfinetuned"
36
  KNOWN_WORDS = set()
37
 
38
  # --- CORE NLP & HELPER FUNCTIONS ---
 
203
 
204
  # --- GRADIO INTERFACE FUNCTIONS ---
205
 
 
206
  def get_job_matches(dream_job: str, top_n: int, skills_text: str):
207
  status = "Searching using hybrid model..."
208
  expanded_desc = llm_expand_query(dream_job)
209
+ emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
210
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
211
+
212
  if user_skills:
213
  display_df = score_jobs_by_skills(user_skills, emb_matches)
 
214
  else:
215
  display_df = emb_matches
 
216
 
217
  display_df = display_df.head(top_n)
218
+
219
+ if user_skills:
220
+ status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
221
+ else:
222
+ status = f"Found {len(display_df)} top matches using semantic search."
223
+
224
  table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
225
  if 'Skill Match Score' in display_df.columns:
226
  table_to_show['Skill Match Score'] = display_df['Skill Match Score']
227
+
228
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
229
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
230
+
231
  return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True)
232
 
233
  def rerank_current_results(initial_matches_df, skills_text, top_n):
234
  if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
235
  return "Please find matches first before re-ranking.", pd.DataFrame(), gr.Dropdown(visible=False)
236
+
 
237
  initial_matches_df = pd.DataFrame(initial_matches_df)
238
+
239
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
240
  if not user_skills:
241
  status = "Skills cleared. Showing original semantic search results."
 
251
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
252
  return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True)
253
 
 
254
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
255
  if not dream_job:
256
  return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False)
 
259
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
260
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
261
  return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True)
262
+
263
  status, emb_matches, table_to_show, dropdown, details_accordion = get_job_matches(dream_job, top_n, skills_text)
264
  return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
265
 
 
268
  return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
269
 
270
  def on_select_job(job_id, skills_text):
271
+ if job_id is None:
272
+ return "", "", "", "", "", gr.Accordion(visible=False), [], 0, gr.Button(visible=False)
273
+
274
  row = original_df.loc[job_id]
275
  title, company = str(row.get("job_title", "")), str(row.get("company", ""))
276
  job_details_markdown = f"### {title} β€” {company}"
277
  duties, qualifications, description = str(row.get('Duties', '')), str(row.get('qualifications', '')), str(row.get('Description', ''))
278
+
279
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
280
+ job_skills = row.get("Skills", [])
281
 
282
+ if not job_skills:
283
+ learning_plan_html = "<p><i>No specific skills were extracted for this job.</i></p>"
284
+ return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
285
+
286
+ all_missing_skills = sorted([s for s in job_skills if not any(_skill_match(ut, s) for ut in user_skills)], key=lambda x: x.lower())
287
+
288
+ if not all_missing_skills:
289
+ learning_plan_html = "<h4 style='color:green;'>πŸŽ‰ You have all the required skills!</h4>"
290
+ return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
291
+
292
+ if user_skills:
293
+ score_val = (len(job_skills) - len(all_missing_skills)) / len(job_skills)
294
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
295
+ headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
296
+ learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
297
+ skills_to_display = all_missing_skills[:5]
298
+ items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
299
+ learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
300
 
301
+ return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
302
+
303
+ else:
304
+ headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
305
+ skills_to_display = all_missing_skills[:5]
306
+ items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
307
+ learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
308
+
309
+ full_skill_list_for_state = all_missing_skills
310
+ new_offset = len(skills_to_display)
311
+ should_button_be_visible = len(all_missing_skills) > 5
312
+
313
+ return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
314
+
315
+ def load_more_skills(full_skills_list, current_offset):
316
+ SKILLS_INCREMENT = 5
317
+ new_offset = current_offset + SKILLS_INCREMENT
318
+ skills_to_display = full_skills_list[:new_offset]
319
+
320
+ items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
321
+ learning_plan_html = f"<h4>To be a good fit for this role, you'll need to learn these skills:</h4><ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
322
+
323
+ should_button_be_visible = new_offset < len(full_skills_list)
324
+
325
+ return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
326
 
327
  def on_reset():
328
+ return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False))
 
329
 
330
  # --- Run Initialization ---
331
  print("Starting application initialization...")
332
  initialization_status = initialize_data_and_model()
333
  print(initialization_status)
334
 
335
+ # --- Gradio Interface Definition ---
336
  with gr.Blocks(theme=gr.themes.Soft()) as ui:
337
  gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
338
 
 
339
  initial_matches_state = gr.State()
340
+ missing_skills_state = gr.State([])
341
+ skills_offset_state = gr.State(0)
342
 
343
  with gr.Row():
344
  with gr.Column(scale=3):
345
  dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
346
  with gr.Accordion("Optional: Add Your Skills to Re-rank Results", open=False):
347
+ with gr.Row():
348
+ skills_text = gr.Textbox(label='Your Skills (comma-separated)', placeholder="e.g., Python, data analysis", scale=3)
349
+ rerank_btn = gr.Button("Re-rank", variant="secondary", scale=1)
350
  with gr.Column(scale=1):
351
  topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
352
  search_btn = gr.Button("Find Matches", variant="primary")
 
364
  with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
365
  job_details_markdown = gr.Markdown()
366
 
 
367
  with gr.Tabs():
368
  with gr.TabItem("Duties"):
369
  duties_markdown = gr.Markdown()
 
373
  description_markdown = gr.Markdown()
374
 
375
  learning_plan_output = gr.HTML(label="Learning Plan")
376
+ load_more_btn = gr.Button("Load More Skills", visible=False)
377
 
378
  # --- Event Handlers ---
 
 
379
  search_btn.click(
380
  fn=find_matches_and_rank_with_check,
381
  inputs=[dream_text, topk_slider, skills_text],
 
392
  )
393
  reset_btn.click(
394
  fn=on_reset,
395
+ outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn],
396
  queue=False
397
  )
398
  rerank_btn.click(
 
403
  job_selector.change(
404
  fn=on_select_job,
405
  inputs=[job_selector, skills_text],
406
+ outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn]
407
+ )
408
+ load_more_btn.click(
409
+ fn=load_more_skills,
410
+ inputs=[missing_skills_state, skills_offset_state],
411
+ outputs=[learning_plan_output, skills_offset_state, load_more_btn]
412
  )
413
 
414
  ui.launch()