its-zion-18 commited on
Commit
ed9ab6a
Β·
verified Β·
1 Parent(s): 9257d69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +303 -173
app.py CHANGED
@@ -6,24 +6,49 @@ import re
6
  import nltk
7
  from nltk.corpus import words, stopwords
8
  import urllib.parse as _url
9
- from sklearn.feature_extraction.text import TfidfVectorizer
10
- from sklearn.metrics.pairwise import cosine_similarity
11
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
12
  from nltk.stem import PorterStemmer
13
  import gradio as gr
 
 
14
 
15
- # --- CORRECTED: Download necessary NLTK data ---
16
- # This revised block is more direct and ensures all packages are downloaded.
 
17
  for package in ['words', 'stopwords', 'averaged_perceptron_tagger', 'punkt']:
18
  try:
19
  nltk.data.find(f'corpora/{package}' if package in ['words', 'stopwords'] else f'taggers/{package}' if package == 'averaged_perceptron_tagger' else f'tokenizers/{package}')
20
  except LookupError:
21
  nltk.download(package)
22
- # ------------------------------------------------
23
 
24
  STOPWORDS = set(stopwords.words('english'))
25
  stemmer = PorterStemmer()
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # --- GLOBAL STATE & DATA ---
28
  original_df = None
29
  combined_df = None
@@ -43,22 +68,6 @@ def _norm_skill_token(s: str) -> str:
43
  s = re.sub(r'\s+', ' ', s)
44
  return s
45
 
46
- def _skill_match(token1: str, token2: str, threshold: float = 0.9) -> bool:
47
- t1 = _norm_skill_token(token1)
48
- t2 = _norm_skill_token(token2)
49
- if t1 == t2 or t1 in t2 or t2 in t1:
50
- return True
51
- try:
52
- if len(t1) > 2 and len(t2) > 2:
53
- vectorizer = TfidfVectorizer().fit([t1, t2])
54
- vectors = vectorizer.transform([t1, t2])
55
- similarity = cosine_similarity(vectors)[0, 1]
56
- if similarity >= threshold:
57
- return True
58
- except:
59
- pass
60
- return False
61
-
62
  def build_known_vocabulary(df: pd.DataFrame):
63
  global KNOWN_WORDS
64
  english_words = set(w.lower() for w in words.words())
@@ -83,9 +92,7 @@ def initialize_llm_client():
83
  model_llm = AutoModelForCausalLM.from_pretrained(
84
  LLM_MODEL_NAME, torch_dtype="auto", device_map="auto", trust_remote_code=True
85
  )
86
- LLM_PIPELINE = pipeline(
87
- "text-generation", model=model_llm, tokenizer=tokenizer, max_new_tokens=100, do_sample=True, temperature=0.7
88
- )
89
  return True
90
  except Exception as e:
91
  print(f"🚨 ERROR initializing local LLM: {e}")
@@ -129,64 +136,173 @@ def find_job_matches(original_user_query: str, expanded_user_query: str, top_k:
129
  final_results_df = final_results_df.set_index('job_id', drop=False).rename(columns={'job_id': 'Job ID'})
130
  return final_results_df
131
 
132
- def score_jobs_by_skills(user_tokens: list[str], df_to_rank: pd.DataFrame) -> pd.DataFrame:
133
- if df_to_rank is None or df_to_rank.empty: return pd.DataFrame()
 
 
134
  ranked_df = df_to_rank.copy()
135
- if 'Skills' not in ranked_df.columns: return ranked_df.sort_values(by='Similarity Score', ascending=False)
136
- def calculate_match(row, user_tokens):
137
- job_skills = row.get('Skills', [])
138
- if not isinstance(job_skills, list): return [], 0, 0.0
139
- matched_skills = [s for s in job_skills if any(_skill_match(ut, s) for ut in user_tokens)]
140
- total_required_count = len(job_skills)
141
- match_score = len(matched_skills) / total_required_count if total_required_count > 0 else 0.0
142
- return matched_skills, len(matched_skills), match_score
143
- results = ranked_df.apply(lambda row: calculate_match(row, user_tokens), axis=1, result_type='expand')
144
- ranked_df[['Skill Matches', 'Skill Match Count', 'Skill Match Score']] = results
145
- ranked_df = ranked_df.sort_values(by=['Skill Match Score', 'Similarity Score'], ascending=[False, False]).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  return ranked_df.set_index('Job ID', drop=False).rename_axis(None)
147
 
148
  def initialize_data_and_model():
149
  global original_df, combined_df, model, combined_job_embeddings, original_job_title_embeddings
 
 
150
  print("--- Initializing LLM Client ---")
151
- if not initialize_llm_client(): print("Warning: LLM Client failed to initialize.")
152
- print("--- Loading Datasets ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
154
- original_df = ds["original"].to_pandas()
155
  augmented_df = ds["augmented"].to_pandas()
156
- original_df['job_id'] = original_df.index
157
  max_id = len(original_df) - 1
158
  augmented_df['job_id'] = augmented_df.index.map(lambda i: min(i // 20, max_id))
159
- def create_full_text(row):
160
- return " ".join([str(s) for s in [row.get("Job title"), row.get("Company"), row.get("Duties"), row.get("qualifications"), row.get("Description")]])
161
- original_df["full_text"] = original_df.apply(create_full_text, axis=1)
162
  augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
 
163
  combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
164
  original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
165
- def extract_skills_from_text(text):
166
- if not isinstance(text, str): return []
167
- grammar = "NP: {<JJ.?>*<NN.?>+}"
168
- chunk_parser = nltk.RegexpParser(grammar)
169
- tokens = nltk.word_tokenize(text.lower())
170
- tagged_tokens = nltk.pos_tag(tokens)
171
- chunked_text = chunk_parser.parse(tagged_tokens)
172
- skills = []
173
- for subtree in chunked_text.subtrees():
174
- if subtree.label() == 'NP':
175
- phrase = " ".join(word for word, tag in subtree.leaves())
176
- junk_phrases = {'demonstrated experience', 'experience', 'related field', 'college/university level', 'equivalent foreign degree', 'cacrep standards', 'students', 'learning experience', 'ability', 'process', 'accreditation', 'human development', 'social welfare', 'sociology', 'pre-service teachers', 'abilities', 'books', 'certifications', 'college', 'level', 'licenses', 'years', 'form', 'knowledge', 'skills'}
177
- if phrase not in junk_phrases and _norm_skill_token(phrase) and phrase not in STOPWORDS:
178
- skills.append(_norm_skill_token(phrase))
179
- keywords = {'teaching', 'training', 'leadership', 'management', 'data management', 'budget development', 'report'}
180
- for keyword in keywords:
181
- if re.search(r'\b' + re.escape(keyword) + r'\b', text.lower()) and _norm_skill_token(keyword) not in skills:
182
- skills.append(_norm_skill_token(keyword))
183
- stemmed_skills = {}
184
- for skill in skills:
185
- stemmed_phrase = ' '.join([stemmer.stem(word) for word in skill.split()])
186
- if stemmed_phrase not in stemmed_skills:
187
- stemmed_skills[stemmed_phrase] = skill
188
- return list(stemmed_skills.values())
189
- original_df['Skills'] = original_df['qualifications'].apply(extract_skills_from_text)
190
  print("--- Loading Fine-Tuned Sentence Transformer Model ---")
191
  model = SentenceTransformer(FINETUNED_MODEL_ID)
192
  print("--- Encoding Embeddings ---")
@@ -201,145 +317,186 @@ def _course_links_for(skill: str) -> str:
201
  links = [("Coursera", f"https://www.coursera.org/search?query={q}"), ("edX", f"https://www.edx.org/search?q={q}"), ("Udemy", f"https://www.udemy.com/courses/search/?q={q}"), ("YouTube", f"https://www.youtube.com/results?search_query={q}+tutorial")]
202
  return " β€’ ".join([f'<a href="{u}" target="_blank" style="color: #007bff;">{name}</a>' for name, u in links])
203
 
204
- # --- GRADIO INTERFACE FUNCTIONS ---
205
-
206
  def get_job_matches(dream_job: str, top_n: int, skills_text: str):
207
  status = "Searching using hybrid model..."
208
  expanded_desc = llm_expand_query(dream_job)
209
  emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
210
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
211
-
212
- if user_skills:
213
- display_df = score_jobs_by_skills(user_skills, emb_matches)
214
- else:
215
- display_df = emb_matches
216
-
217
- display_df = display_df.head(top_n)
218
-
219
  if user_skills:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
221
  else:
 
222
  status = f"Found {len(display_df)} top matches using semantic search."
223
-
224
- table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
225
- if 'Skill Match Score' in display_df.columns:
226
- table_to_show['Skill Match Score'] = display_df['Skill Match Score']
227
-
 
 
 
 
 
 
228
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
229
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
230
-
231
- return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True)
 
232
 
233
  def rerank_current_results(initial_matches_df, skills_text, top_n):
234
  if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
235
- return "Please find matches first before re-ranking.", pd.DataFrame(), gr.Dropdown(visible=False)
236
-
237
  initial_matches_df = pd.DataFrame(initial_matches_df)
238
-
239
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
 
 
 
 
 
240
  if not user_skills:
241
  status = "Skills cleared. Showing original semantic search results."
242
  display_df = initial_matches_df.head(top_n)
243
  table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
 
 
244
  else:
245
  ranked_df = score_jobs_by_skills(user_skills, initial_matches_df)
246
  status = f"Results **re-ranked** based on your {len(user_skills)} skills."
247
  display_df = ranked_df.head(top_n)
248
- table_to_show = display_df[['job_title', 'company', 'Similarity Score', 'Skill Match Score']]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
251
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
252
- return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True)
 
 
253
 
254
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
255
  if not dream_job:
256
- return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False)
 
257
  unrecognized_words = check_spelling_in_query(dream_job)
258
  if unrecognized_words:
259
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
260
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
261
- return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True)
262
-
263
- status, emb_matches, table_to_show, dropdown, details_accordion = get_job_matches(dream_job, top_n, skills_text)
264
- return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
 
265
 
266
  def find_matches_and_rank_anyway(dream_job: str, top_n: int, skills_text: str):
267
- status, emb_matches, table_to_show, dropdown, details_accordion = get_job_matches(dream_job, top_n, skills_text)
268
- return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False)
269
 
270
  def on_select_job(job_id, skills_text):
271
- if job_id is None:
272
- return "", "", "", "", "", gr.Accordion(visible=False), [], 0, gr.Button(visible=False)
273
-
274
  row = original_df.loc[job_id]
275
  title, company = str(row.get("job_title", "")), str(row.get("company", ""))
276
  job_details_markdown = f"### {title} β€” {company}"
277
  duties, qualifications, description = str(row.get('Duties', '')), str(row.get('qualifications', '')), str(row.get('Description', ''))
278
-
279
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
280
  job_skills = row.get("Skills", [])
281
-
282
  if not job_skills:
283
- learning_plan_html = "<p><i>No specific skills were extracted for this job.</i></p>"
284
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
285
 
286
- all_missing_skills = sorted([s for s in job_skills if not any(_skill_match(ut, s) for ut in user_skills)], key=lambda x: x.lower())
287
-
288
- if not all_missing_skills:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  learning_plan_html = "<h4 style='color:green;'>πŸŽ‰ You have all the required skills!</h4>"
 
290
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
291
-
292
  if user_skills:
293
- score_val = (len(job_skills) - len(all_missing_skills)) / len(job_skills)
294
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
295
  headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
296
  learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
297
- skills_to_display = all_missing_skills[:5]
298
  items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
299
  learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
300
-
301
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
302
-
303
  else:
304
  headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
305
- skills_to_display = all_missing_skills[:5]
306
  items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
307
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
308
-
309
- full_skill_list_for_state = all_missing_skills
310
  new_offset = len(skills_to_display)
311
- should_button_be_visible = len(all_missing_skills) > 5
312
-
313
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
314
 
315
  def load_more_skills(full_skills_list, current_offset):
316
  SKILLS_INCREMENT = 5
317
  new_offset = current_offset + SKILLS_INCREMENT
318
  skills_to_display = full_skills_list[:new_offset]
319
-
320
  items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
321
  learning_plan_html = f"<h4>To be a good fit for this role, you'll need to learn these skills:</h4><ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
322
-
323
  should_button_be_visible = new_offset < len(full_skills_list)
324
-
325
  return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
326
 
327
  def on_reset():
328
- return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False))
 
329
 
330
- # --- Run Initialization ---
331
  print("Starting application initialization...")
332
  initialization_status = initialize_data_and_model()
333
  print(initialization_status)
334
 
335
- # --- Gradio Interface Definition ---
336
  with gr.Blocks(theme=gr.themes.Soft()) as ui:
337
  gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
338
-
339
  initial_matches_state = gr.State()
340
  missing_skills_state = gr.State([])
341
  skills_offset_state = gr.State(0)
342
-
343
  with gr.Row():
344
  with gr.Column(scale=3):
345
  dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
@@ -351,64 +508,37 @@ with gr.Blocks(theme=gr.themes.Soft()) as ui:
351
  topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
352
  search_btn = gr.Button("Find Matches", variant="primary")
353
  reset_btn = gr.Button("Reset All")
354
-
355
  status_text = gr.Markdown("Status: Ready.")
356
  spelling_alert = gr.Markdown(visible=False)
357
  with gr.Row(visible=False) as spelling_row:
358
  search_anyway_btn = gr.Button("Search Anyway", variant="secondary")
359
  retype_btn = gr.Button("Let Me Fix It", variant="stop")
360
-
361
- df_output = gr.DataFrame(label="Job Matches", interactive=False)
362
- job_selector = gr.Dropdown(label="Select a job to see more details & learning plan:", visible=False)
363
 
 
 
 
 
 
 
 
364
  with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
365
  job_details_markdown = gr.Markdown()
366
-
367
  with gr.Tabs():
368
- with gr.TabItem("Duties"):
369
- duties_markdown = gr.Markdown()
370
- with gr.TabItem("Qualifications"):
371
- qualifications_markdown = gr.Markdown()
372
- with gr.TabItem("Full Description"):
373
- description_markdown = gr.Markdown()
374
-
375
  learning_plan_output = gr.HTML(label="Learning Plan")
376
  load_more_btn = gr.Button("Load More Skills", visible=False)
377
 
378
- # --- Event Handlers ---
379
- search_btn.click(
380
- fn=find_matches_and_rank_with_check,
381
- inputs=[dream_text, topk_slider, skills_text],
382
- outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row]
383
- )
384
- search_anyway_btn.click(
385
- fn=find_matches_and_rank_anyway,
386
- inputs=[dream_text, topk_slider, skills_text],
387
- outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row]
388
- )
389
- retype_btn.click(
390
- lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False)),
391
- outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row]
392
- )
393
- reset_btn.click(
394
- fn=on_reset,
395
- outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn],
396
- queue=False
397
- )
398
- rerank_btn.click(
399
- fn=rerank_current_results,
400
- inputs=[initial_matches_state, skills_text, topk_slider],
401
- outputs=[status_text, df_output, job_selector]
402
- )
403
- job_selector.change(
404
- fn=on_select_job,
405
- inputs=[job_selector, skills_text],
406
- outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn]
407
- )
408
- load_more_btn.click(
409
- fn=load_more_skills,
410
- inputs=[missing_skills_state, skills_offset_state],
411
- outputs=[learning_plan_output, skills_offset_state, load_more_btn]
412
- )
413
 
414
- ui.launch()
 
6
  import nltk
7
  from nltk.corpus import words, stopwords
8
  import urllib.parse as _url
 
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
10
  from nltk.stem import PorterStemmer
11
  import gradio as gr
12
+ import os
13
+ from tqdm import tqdm
14
 
15
+ tqdm.pandas()
16
+
17
+ # --- NLTK Data Download ---
18
  for package in ['words', 'stopwords', 'averaged_perceptron_tagger', 'punkt']:
19
  try:
20
  nltk.data.find(f'corpora/{package}' if package in ['words', 'stopwords'] else f'taggers/{package}' if package == 'averaged_perceptron_tagger' else f'tokenizers/{package}')
21
  except LookupError:
22
  nltk.download(package)
 
23
 
24
  STOPWORDS = set(stopwords.words('english'))
25
  stemmer = PorterStemmer()
26
 
27
+ # --- Expanded Skill Whitelist ---
28
+ SKILL_WHITELIST = {
29
+ # Technical & Data
30
+ 'python', 'java', 'c++', 'javascript', 'typescript', 'sql', 'nosql', 'html', 'css', 'react', 'angular', 'vue',
31
+ 'nodejs', 'django', 'flask', 'fastapi', 'spring boot', 'ruby on rails', 'php', 'swift', 'kotlin', 'dart', 'flutter',
32
+ 'machine learning', 'deep learning', 'tensorflow', 'pytorch', 'keras', 'scikit-learn', 'pandas', 'numpy', 'matplotlib',
33
+ 'natural language processing', 'nlp', 'computer vision', 'data analysis', 'data science', 'data engineering',
34
+ 'big data', 'spark', 'hadoop', 'kafka', 'data visualization', 'tableau', 'power bi', 'd3.js', 'statistics', 'analytics',
35
+ 'aws', 'azure', 'google cloud', 'gcp', 'docker', 'kubernetes', 'terraform', 'ansible', 'ci/cd', 'jenkins',
36
+ 'git', 'github', 'devops', 'linux', 'unix', 'shell scripting', 'powershell', 'cybersecurity', 'penetration testing',
37
+ 'network security', 'cryptography', 'blockchain', 'c#', '.net', 'sql server', 'mysql', 'postgresql', 'mongodb', 'redis',
38
+ 'elasticsearch', 'api design', 'rest apis', 'graphql', 'microservices', 'serverless', 'system design', 'saas',
39
+ # Business & Consulting
40
+ 'agile', 'scrum', 'project management', 'product management', 'consulting', 'client management', 'business development',
41
+ 'strategy', 'stakeholder management', 'risk management', 'compliance', 'aml', 'kyc', 'reinsurance', 'finance',
42
+ 'financial modeling', 'financial analysis', 'due diligence', 'sourcing', 'procurement', 'negotiation', 'supply chain',
43
+ 'business analysis', 'business intelligence', 'presentations', 'public speaking', 'time management', 'critical thinking',
44
+ 'design thinking', 'innovation', 'adaptability', 'supervisory', 'pmp', 'cpsm', 'cips', 'microsoft office', 'communication',
45
+ 'organizational skills',
46
+ # Soft & Other
47
+ 'leadership', 'stakeholder communication', 'client communication', 'teamwork', 'collaboration', 'problem solving',
48
+ 'ui/ux design', 'figma', 'sketch', 'adobe xd', 'graphic design', 'autocad', 'solidworks', 'sales', 'marketing',
49
+ 'seo', 'sem', 'content writing', 'customer support', 'technical writing', 'sap', 'oracle', 'budgeting', 'mentoring', 'supervising'
50
+ }
51
+
52
  # --- GLOBAL STATE & DATA ---
53
  original_df = None
54
  combined_df = None
 
68
  s = re.sub(r'\s+', ' ', s)
69
  return s
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def build_known_vocabulary(df: pd.DataFrame):
72
  global KNOWN_WORDS
73
  english_words = set(w.lower() for w in words.words())
 
92
  model_llm = AutoModelForCausalLM.from_pretrained(
93
  LLM_MODEL_NAME, torch_dtype="auto", device_map="auto", trust_remote_code=True
94
  )
95
+ LLM_PIPELINE = pipeline("text-generation", model=model_llm, tokenizer=tokenizer)
 
 
96
  return True
97
  except Exception as e:
98
  print(f"🚨 ERROR initializing local LLM: {e}")
 
136
  final_results_df = final_results_df.set_index('job_id', drop=False).rename(columns={'job_id': 'Job ID'})
137
  return final_results_df
138
 
139
+ def score_jobs_by_skills(user_skills: list[str], df_to_rank: pd.DataFrame) -> pd.DataFrame:
140
+ if df_to_rank is None or df_to_rank.empty or not user_skills:
141
+ return df_to_rank.sort_values(by='Similarity Score', ascending=False) if df_to_rank is not None else pd.DataFrame()
142
+
143
  ranked_df = df_to_rank.copy()
144
+ if 'Skills' not in ranked_df.columns:
145
+ return ranked_df.sort_values(by='Similarity Score', ascending=False)
146
+
147
+ user_skill_embeddings = model.encode(user_skills, convert_to_tensor=True)
148
+ all_job_skills = sorted(list(set(skill for skills_list in ranked_df['Skills'] if skills_list for skill in skills_list)))
149
+
150
+ if not all_job_skills:
151
+ ranked_df['Skill Match Score'] = 0.0
152
+ ranked_df['Final Score'] = ranked_df['Similarity Score']
153
+ return ranked_df
154
+
155
+ job_skill_embeddings = model.encode(all_job_skills, convert_to_tensor=True)
156
+ similarity_matrix = util.cos_sim(user_skill_embeddings, job_skill_embeddings)
157
+
158
+ def calculate_confidence_adjusted_score(row):
159
+ job_skills_list = row.get('Skills', [])
160
+ if not job_skills_list:
161
+ return 0.0
162
+
163
+ total_required = len(job_skills_list)
164
+ sum_of_max_similarities = 0.0
165
+ for job_skill in job_skills_list:
166
+ try:
167
+ job_skill_idx = all_job_skills.index(job_skill)
168
+ max_sim = torch.max(similarity_matrix[:, job_skill_idx])
169
+ sum_of_max_similarities += max_sim.item()
170
+ except (ValueError, IndexError):
171
+ continue
172
+
173
+ avg_score = sum_of_max_similarities / total_required if total_required > 0 else 0.0
174
+ skill_count_factor = min(1.0, total_required / 5.0)
175
+ return avg_score * skill_count_factor
176
+
177
+ ranked_df['Skill Match Score'] = ranked_df.apply(calculate_confidence_adjusted_score, axis=1)
178
+
179
+ ranked_df['Final Score'] = (0.8 * ranked_df['Similarity Score']) + (0.2 * ranked_df['Skill Match Score'])
180
+
181
+ ranked_df = ranked_df.sort_values(by='Final Score', ascending=False).reset_index(drop=True)
182
  return ranked_df.set_index('Job ID', drop=False).rename_axis(None)
183
 
184
  def initialize_data_and_model():
185
  global original_df, combined_df, model, combined_job_embeddings, original_job_title_embeddings
186
+ PROCESSED_DATA_PATH = "processed_jobs_with_skills.parquet"
187
+
188
  print("--- Initializing LLM Client ---")
189
+ if not initialize_llm_client(): print("Warning: LLM Client failed to initialize. Will use NLTK only for skills.")
190
+
191
+ if os.path.exists(PROCESSED_DATA_PATH):
192
+ print(f"--- Loading pre-processed data from {PROCESSED_DATA_PATH} ---")
193
+ original_df = pd.read_parquet(PROCESSED_DATA_PATH)
194
+ else:
195
+ print("--- No pre-processed data found. Starting one-time processing... ---")
196
+ ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
197
+ original_df = ds["original"].to_pandas()
198
+
199
+ def extract_skills_llm(text: str) -> list[str]:
200
+ if not isinstance(text, str) or len(text.strip()) < 20 or not LLM_PIPELINE: return []
201
+ prompt = f"""
202
+ Instruct: You are an expert technical recruiter. Extract the key skills from the job description text. List technical and soft skills as a comma-separated string.
203
+ [Example 1]
204
+ Text: "Requires 3+ years of experience in cloud infrastructure. Must be proficient in AWS, particularly EC2 and S3. Experience with Terraform for IaC is a plus."
205
+ Extracted Skills: cloud infrastructure, aws, ec2, s3, terraform, infrastructure as code
206
+ [Example 2]
207
+ Text: "Seeking a team lead with strong project management abilities. Must communicate effectively with stakeholders and manage timelines using Agile methodologies like Scrum."
208
+ Extracted Skills: project management, leadership, stakeholder communication, agile, scrum
209
+ [Actual Task]
210
+ Text: "{text}"
211
+ Extracted Skills:
212
+ """
213
+ try:
214
+ response = LLM_PIPELINE(prompt, max_new_tokens=150, do_sample=False, temperature=0.1)
215
+ generated_text = response[0]['generated_text']
216
+ skills_part = generated_text.split("Extracted Skills:")[-1].strip()
217
+ skills = [skill.strip() for skill in skills_part.split(',') if skill.strip()]
218
+ return list(dict.fromkeys(s.lower() for s in skills))
219
+ except Exception: return []
220
+
221
+ def extract_skills_nltk(text: str) -> list[str]:
222
+ if not isinstance(text, str): return []
223
+ text_lower = text.lower()
224
+ grammar = "NP: {<JJ.*>*<NN.*>+}"
225
+ chunk_parser = nltk.RegexpParser(grammar)
226
+ tokens = nltk.word_tokenize(text_lower)
227
+ tagged_tokens = nltk.pos_tag(tokens)
228
+ chunked_text = chunk_parser.parse(tagged_tokens)
229
+ potential_skills = set()
230
+ for subtree in chunked_text.subtrees():
231
+ if subtree.label() == 'NP':
232
+ phrase = " ".join(word for word, tag in subtree.leaves())
233
+ if _norm_skill_token(phrase) in SKILL_WHITELIST:
234
+ potential_skills.add(_norm_skill_token(phrase))
235
+ return sorted(list(potential_skills))
236
+
237
+ def extract_skills_direct_scan(text: str) -> list[str]:
238
+ if not isinstance(text, str): return []
239
+ found_skills = set()
240
+ for skill in SKILL_WHITELIST:
241
+ if re.search(r'\b' + re.escape(skill) + r'\b', text, re.IGNORECASE):
242
+ found_skills.add(skill)
243
+ return list(found_skills)
244
+
245
+ def expand_skills_with_llm(job_title: str, existing_skills: list) -> list:
246
+ if not LLM_PIPELINE or not job_title: return []
247
+
248
+ skills_to_add = 6 - len(existing_skills)
249
+ prompt = f"""
250
+ Instruct: A job has the title "{job_title}" and requires the skills: {', '.join(existing_skills)}.
251
+ Based on this, what are {skills_to_add} additional, closely related skills typically required for such a role?
252
+ List only the new skills, separated by commas. Do not repeat skills from the original list.
253
+
254
+ Additional Skills:
255
+ """
256
+ try:
257
+ response = LLM_PIPELINE(prompt, max_new_tokens=50, do_sample=True, temperature=0.5)
258
+ generated_text = response[0]['generated_text']
259
+ skills_part = generated_text.split("Additional Skills:")[-1].strip()
260
+ new_skills = [skill.strip().lower() for skill in skills_part.split(',') if skill.strip()]
261
+ return new_skills
262
+ except Exception:
263
+ return []
264
+
265
+ def extract_skills_hybrid(row) -> list[str]:
266
+ text = row['text_for_skills']
267
+ job_title = row.get('Job title', '') # Use original Job title for context
268
+
269
+ llm_skills = extract_skills_llm(text)
270
+ nltk_skills = extract_skills_nltk(text)
271
+ direct_skills = extract_skills_direct_scan(text)
272
+ combined_skills = set(llm_skills) | set(nltk_skills) | set(direct_skills)
273
+
274
+ # If the combined list is still too short, expand it
275
+ if len(combined_skills) < 6:
276
+ expanded_skills = expand_skills_with_llm(job_title, list(combined_skills))
277
+ combined_skills.update(expanded_skills)
278
+
279
+ return sorted(list(combined_skills))
280
+
281
+ def create_text_for_skills(row):
282
+ return " ".join([str(s) for s in [row.get("Job title"), row.get("Duties"), row.get("qualifications"), row.get("Description")] if pd.notna(s)])
283
+
284
+ original_df["text_for_skills"] = original_df.apply(create_text_for_skills, axis=1)
285
+ print("--- Extracting skills with HYBRID ACCURACY model. Please wait... ---")
286
+ # Apply the hybrid function row-wise to include job title context
287
+ original_df['Skills'] = original_df.progress_apply(extract_skills_hybrid, axis=1)
288
+ original_df = original_df.drop(columns=['text_for_skills'])
289
+
290
+ print(f"--- Saving processed data to {PROCESSED_DATA_PATH} for faster future startups ---")
291
+ original_df.to_parquet(PROCESSED_DATA_PATH)
292
+
293
+ original_df['job_id'] = original_df.index
294
+ def create_full_text(row): return " ".join([str(s) for s in [row.get("Job title"), row.get("Company"), row.get("Duties"), row.get("qualifications"), row.get("Description")]])
295
+ original_df["full_text"] = original_df.apply(create_full_text, axis=1)
296
+
297
  ds = datasets.load_dataset("its-zion-18/Jobs-tabular-dataset")
 
298
  augmented_df = ds["augmented"].to_pandas()
 
299
  max_id = len(original_df) - 1
300
  augmented_df['job_id'] = augmented_df.index.map(lambda i: min(i // 20, max_id))
 
 
 
301
  augmented_df["full_text"] = augmented_df.apply(create_full_text, axis=1)
302
+
303
  combined_df = pd.concat([original_df.copy(), augmented_df.copy()], ignore_index=True)
304
  original_df = original_df.rename(columns={'Job title': 'job_title', 'Company': 'company'})
305
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  print("--- Loading Fine-Tuned Sentence Transformer Model ---")
307
  model = SentenceTransformer(FINETUNED_MODEL_ID)
308
  print("--- Encoding Embeddings ---")
 
317
  links = [("Coursera", f"https://www.coursera.org/search?query={q}"), ("edX", f"https://www.edx.org/search?q={q}"), ("Udemy", f"https://www.udemy.com/courses/search/?q={q}"), ("YouTube", f"https://www.youtube.com/results?search_query={q}+tutorial")]
318
  return " β€’ ".join([f'<a href="{u}" target="_blank" style="color: #007bff;">{name}</a>' for name, u in links])
319
 
 
 
320
  def get_job_matches(dream_job: str, top_n: int, skills_text: str):
321
  status = "Searching using hybrid model..."
322
  expanded_desc = llm_expand_query(dream_job)
323
  emb_matches = find_job_matches(dream_job, expanded_desc, top_k=50)
324
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
325
+
326
+ # --- NEW: Initialize variables for the recommendations section ---
327
+ recommendations_table = pd.DataFrame()
328
+ recommendations_visible = False
329
+
 
 
 
330
  if user_skills:
331
+ scored_df = score_jobs_by_skills(user_skills, emb_matches)
332
+
333
+ # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
334
+ skill_sorted_df = scored_df.sort_values(by='Skill Match Score', ascending=False).head(5)
335
+ if not skill_sorted_df.empty:
336
+ recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
337
+ recs = recs.rename(columns={'Final Score': 'Overall Score'})
338
+ recs['Skill Match Score'] = recs['Skill Match Score'].map('{:.2%}'.format)
339
+ recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
340
+ recommendations_table = recs
341
+ recommendations_visible = True
342
+ # --- END NEW ---
343
+
344
+ display_df = scored_df.head(top_n)
345
  status = f"Found and **re-ranked** results by your {len(user_skills)} skills. Displaying top {len(display_df)}."
346
  else:
347
+ display_df = emb_matches.head(top_n)
348
  status = f"Found {len(display_df)} top matches using semantic search."
349
+
350
+ if 'Final Score' in display_df.columns:
351
+ table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
352
+ table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
353
+ table_to_show['Skill Match Score'] = table_to_show['Skill Match Score'].map('{:.2%}'.format)
354
+ table_to_show['Overall Score'] = table_to_show['Overall Score'].map('{:.2%}'.format)
355
+ else:
356
+ table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
357
+ table_to_show = table_to_show.rename(columns={'Similarity Score': 'Overall Score'})
358
+ table_to_show['Overall Score'] = table_to_show['Overall Score'].map('{:.2%}'.format)
359
+
360
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
361
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
362
+
363
+ # --- MODIFIED: Added new outputs for recommendations ---
364
+ return status, emb_matches, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), gr.Accordion(visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
365
 
366
  def rerank_current_results(initial_matches_df, skills_text, top_n):
367
  if initial_matches_df is None or pd.DataFrame(initial_matches_df).empty:
368
+ return "Please find matches first before re-ranking.", pd.DataFrame(), gr.Dropdown(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
 
369
  initial_matches_df = pd.DataFrame(initial_matches_df)
 
370
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
371
+
372
+ # --- NEW: Initialize variables for the recommendations section ---
373
+ recommendations_table = pd.DataFrame()
374
+ recommendations_visible = False
375
+
376
  if not user_skills:
377
  status = "Skills cleared. Showing original semantic search results."
378
  display_df = initial_matches_df.head(top_n)
379
  table_to_show = display_df[['job_title', 'company', 'Similarity Score']]
380
+ table_to_show = table_to_show.rename(columns={'Similarity Score': 'Overall Score'})
381
+ table_to_show['Overall Score'] = table_to_show['Overall Score'].map('{:.2%}'.format)
382
  else:
383
  ranked_df = score_jobs_by_skills(user_skills, initial_matches_df)
384
  status = f"Results **re-ranked** based on your {len(user_skills)} skills."
385
  display_df = ranked_df.head(top_n)
386
+
387
+ # --- NEW: Logic to get top 5 jobs based purely on skill match score ---
388
+ skill_sorted_df = ranked_df.sort_values(by='Skill Match Score', ascending=False).head(5)
389
+ if not skill_sorted_df.empty:
390
+ recs = skill_sorted_df[['job_title', 'company', 'Skill Match Score', 'Final Score']].copy()
391
+ recs = recs.rename(columns={'Final Score': 'Overall Score'})
392
+ recs['Skill Match Score'] = recs['Skill Match Score'].map('{:.2%}'.format)
393
+ recs['Overall Score'] = recs['Overall Score'].map('{:.2%}'.format)
394
+ recommendations_table = recs
395
+ recommendations_visible = True
396
+ # --- END NEW ---
397
+
398
+ table_to_show = display_df[['job_title', 'company', 'Final Score', 'Skill Match Score']]
399
+ table_to_show = table_to_show.rename(columns={'Final Score': 'Overall Score'})
400
+ table_to_show['Skill Match Score'] = table_to_show['Skill Match Score'].map('{:.2%}'.format)
401
+ table_to_show['Overall Score'] = table_to_show['Overall Score'].map('{:.2%}'.format)
402
 
403
  dropdown_options = [(f"{i+1}. {row['job_title']} - {row['company']}", row.name) for i, row in display_df.iterrows()]
404
  dropdown_value = dropdown_options[0][1] if dropdown_options else None
405
+
406
+ # --- MODIFIED: Added new outputs for recommendations ---
407
+ return status, table_to_show, gr.Dropdown(choices=dropdown_options, value=dropdown_value, visible=True), recommendations_table, gr.Accordion(visible=recommendations_visible)
408
 
409
  def find_matches_and_rank_with_check(dream_job: str, top_n: int, skills_text: str):
410
  if not dream_job:
411
+ # --- MODIFIED: Added new default outputs ---
412
+ return "Please describe your dream job first.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(""), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)
413
  unrecognized_words = check_spelling_in_query(dream_job)
414
  if unrecognized_words:
415
  word_list_html = ", ".join([f"<b><span style='color: #F87171;'>{w}</span></b>" for w in unrecognized_words])
416
  alert_message = f"<b><span style='color: #F87171;'>⚠️ Possible Spelling Error:</span></b> Unrecognized: {word_list_html}."
417
+ # --- MODIFIED: Added new default outputs ---
418
+ return "Status: Awaiting confirmation.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(alert_message, visible=True), gr.Row(visible=True), pd.DataFrame(), gr.Accordion(visible=False)
419
+
420
+ status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
421
+ return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False), recommendations_table, recommendations_accordion
422
 
423
  def find_matches_and_rank_anyway(dream_job: str, top_n: int, skills_text: str):
424
+ status, emb_matches, table_to_show, dropdown, details_accordion, recommendations_table, recommendations_accordion = get_job_matches(dream_job, top_n, skills_text)
425
+ return status, emb_matches, table_to_show, dropdown, details_accordion, gr.Markdown(visible=False), gr.Row(visible=False), recommendations_table, recommendations_accordion
426
 
427
  def on_select_job(job_id, skills_text):
428
+ if job_id is None: return "", "", "", "", "", gr.Accordion(visible=False), [], 0, gr.Button(visible=False)
 
 
429
  row = original_df.loc[job_id]
430
  title, company = str(row.get("job_title", "")), str(row.get("company", ""))
431
  job_details_markdown = f"### {title} β€” {company}"
432
  duties, qualifications, description = str(row.get('Duties', '')), str(row.get('qualifications', '')), str(row.get('Description', ''))
 
433
  user_skills = [_norm_skill_token(s) for s in skills_text.split(',') if _norm_skill_token(s)]
434
  job_skills = row.get("Skills", [])
 
435
  if not job_skills:
436
+ learning_plan_html = "<p><i>No specific skills could be extracted for this job.</i></p>"
437
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
438
 
439
+ score_val = 0
440
+ all_missing_skills = job_skills
441
+ if user_skills:
442
+ user_skill_embeddings = model.encode(user_skills, convert_to_tensor=True)
443
+ job_skill_embeddings = model.encode(job_skills, convert_to_tensor=True)
444
+ similarity_matrix = util.cos_sim(user_skill_embeddings, job_skill_embeddings)
445
+
446
+ sum_of_max_similarities = torch.sum(torch.max(similarity_matrix, dim=0).values)
447
+ avg_score = (sum_of_max_similarities / len(job_skills)).item() if len(job_skills) > 0 else 0
448
+
449
+ skill_count_factor = min(1.0, len(job_skills) / 5.0)
450
+ score_val = avg_score * skill_count_factor
451
+
452
+ matched_job_skills_mask = torch.any(similarity_matrix > 0.58, dim=0)
453
+ all_missing_skills = [skill for i, skill in enumerate(job_skills) if not matched_job_skills_mask[i]]
454
+
455
+ if user_skills and score_val >= 0.98:
456
  learning_plan_html = "<h4 style='color:green;'>πŸŽ‰ You have all the required skills!</h4>"
457
+ job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
458
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
459
+
460
  if user_skills:
 
461
  job_details_markdown += f"\n**Your skill match:** {score_val:.1%}"
462
  headline = "<b>Great fit!</b>" if score_val >= 0.8 else "<b>Good progress!</b>" if score_val >= 0.5 else "<b>Solid starting point.</b>"
463
  learning_plan_html = f"<h4>{headline} Focus on these skills to improve your match:</h4>"
464
+ skills_to_display = sorted(all_missing_skills)[:5]
465
  items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
466
  learning_plan_html += f"<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
 
467
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), [], 0, gr.Button(visible=False)
 
468
  else:
469
  headline = "<h4>To be a good fit for this role, you'll need to learn these skills:</h4>"
470
+ skills_to_display = sorted(job_skills)[:5]
471
  items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
472
  learning_plan_html = f"{headline}<ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
473
+ full_skill_list_for_state = sorted(job_skills)
 
474
  new_offset = len(skills_to_display)
475
+ should_button_be_visible = len(full_skill_list_for_state) > 5
 
476
  return job_details_markdown, duties, qualifications, description, learning_plan_html, gr.Accordion(visible=True), full_skill_list_for_state, new_offset, gr.Button(visible=should_button_be_visible)
477
 
478
  def load_more_skills(full_skills_list, current_offset):
479
  SKILLS_INCREMENT = 5
480
  new_offset = current_offset + SKILLS_INCREMENT
481
  skills_to_display = full_skills_list[:new_offset]
 
482
  items_html = [f"<li><b>{ms}</b><br>β€’ Learn: {_course_links_for(ms)}</li>" for ms in skills_to_display]
483
  learning_plan_html = f"<h4>To be a good fit for this role, you'll need to learn these skills:</h4><ul style='list-style-type: none; padding-left: 0;'>{''.join(items_html)}</ul>"
 
484
  should_button_be_visible = new_offset < len(full_skills_list)
 
485
  return learning_plan_html, new_offset, gr.Button(visible=should_button_be_visible)
486
 
487
  def on_reset():
488
+ # --- MODIFIED: Added new default outputs for reset ---
489
+ return ("", 3, "", pd.DataFrame(), None, gr.Dropdown(visible=False), gr.Accordion(visible=False), "Status: Ready.", "", "", "", "", gr.Markdown(visible=False), gr.Row(visible=False), [], 0, gr.Button(visible=False), pd.DataFrame(), gr.Accordion(visible=False))
490
 
 
491
  print("Starting application initialization...")
492
  initialization_status = initialize_data_and_model()
493
  print(initialization_status)
494
 
 
495
  with gr.Blocks(theme=gr.themes.Soft()) as ui:
496
  gr.Markdown("# Hybrid Career Planner & Skill Gap Analyzer")
 
497
  initial_matches_state = gr.State()
498
  missing_skills_state = gr.State([])
499
  skills_offset_state = gr.State(0)
 
500
  with gr.Row():
501
  with gr.Column(scale=3):
502
  dream_text = gr.Textbox(label='Your Dream Job Description', lines=3, placeholder="e.g., 'A role in a tech startup focused on machine learning...'")
 
508
  topk_slider = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Number of Matches")
509
  search_btn = gr.Button("Find Matches", variant="primary")
510
  reset_btn = gr.Button("Reset All")
 
511
  status_text = gr.Markdown("Status: Ready.")
512
  spelling_alert = gr.Markdown(visible=False)
513
  with gr.Row(visible=False) as spelling_row:
514
  search_anyway_btn = gr.Button("Search Anyway", variant="secondary")
515
  retype_btn = gr.Button("Let Me Fix It", variant="stop")
 
 
 
516
 
517
+ df_output = gr.DataFrame(label="Job Matches (Sorted by Overall Relevance)", interactive=False)
518
+
519
+ # --- NEW: Added the recommendations section ---
520
+ with gr.Accordion("✨ Based on your current skills and career interest consider these jobs...", open=True, visible=False) as recommendations_accordion:
521
+ recommendations_df_output = gr.DataFrame(label="Top Skill Matches", interactive=False)
522
+
523
+ job_selector = gr.Dropdown(label="Select a job to see more details & learning plan:", visible=False)
524
  with gr.Accordion("Job Details & Learning Plan", open=False, visible=False) as details_accordion:
525
  job_details_markdown = gr.Markdown()
 
526
  with gr.Tabs():
527
+ with gr.TabItem("Duties"): duties_markdown = gr.Markdown()
528
+ with gr.TabItem("Qualifications"): qualifications_markdown = gr.Markdown()
529
+ with gr.TabItem("Full Description"): description_markdown = gr.Markdown()
 
 
 
 
530
  learning_plan_output = gr.HTML(label="Learning Plan")
531
  load_more_btn = gr.Button("Load More Skills", visible=False)
532
 
533
+ # --- MODIFIED: Added new outputs to the click events ---
534
+ search_btn.click(fn=find_matches_and_rank_with_check, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
535
+ search_anyway_btn.click(fn=find_matches_and_rank_anyway, inputs=[dream_text, topk_slider, skills_text], outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
536
+ retype_btn.click(lambda: ("Status: Ready for you to retype.", None, pd.DataFrame(), gr.Dropdown(visible=False), gr.Accordion(visible=False), gr.Markdown(visible=False), gr.Row(visible=False), pd.DataFrame(), gr.Accordion(visible=False)), outputs=[status_text, initial_matches_state, df_output, job_selector, details_accordion, spelling_alert, spelling_row, recommendations_df_output, recommendations_accordion])
537
+ reset_btn.click(fn=on_reset, outputs=[dream_text, topk_slider, skills_text, df_output, initial_matches_state, job_selector, details_accordion, status_text, job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, spelling_alert, spelling_row, missing_skills_state, skills_offset_state, load_more_btn, recommendations_df_output, recommendations_accordion], queue=False)
538
+ rerank_btn.click(fn=rerank_current_results, inputs=[initial_matches_state, skills_text, topk_slider], outputs=[status_text, df_output, job_selector, recommendations_df_output, recommendations_accordion])
539
+
540
+ job_selector.change(fn=on_select_job, inputs=[job_selector, skills_text], outputs=[job_details_markdown, duties_markdown, qualifications_markdown, description_markdown, learning_plan_output, details_accordion, missing_skills_state, skills_offset_state, load_more_btn])
541
+ load_more_btn.click(fn=load_more_skills, inputs=[missing_skills_state, skills_offset_state], outputs=[learning_plan_output, skills_offset_state, load_more_btn])
542
+
543
+ ui.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544