import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import re # ============================================================================ # CONFIGURATION # ============================================================================ MODEL_NAME = "armaniii/Qwen3-4B-populism" # Update with your HuggingFace username # Prompts from helpers.py SYSTEM_PROMPT = ('You are a helpful AI assistant with expertise in identifying populism in public discourse.\n\n' 'Populism can be defined as an anti-elite discourse in the name of the "people". ' 'In other words, populism emphasizes the idea of the common "people" and ' 'often positions this group in opposition to a perceived elite group.\n\n' 'There are two core elements in identifying populism: ' '(i) anti-elitism, i.e., negative invocations of "elites", and ' '(ii) people-centrism, i.e., positive invocations of the "people".\n\n') INSTRUCTION = ('You must classify each sentence in one of the following categories:\n\n' '(a) No populism.\n' '(b) Anti-elitism, i.e., negative invocations of "elites".\n' '(c) People-centrism, i.e., positive invocations of the "People".\n' '(d) Both people-centrism and anti-elitism populism.\n\n') QUERY = 'Which is the most relevant category for the sentence: "{}"?' RESPONSE_START = 'I would categorize this sentence as (' # Label mappings LABEL_MAP = { 'a': 'đĩ No Populism', 'b': 'đĄ Anti-Elitism', 'c': 'đĸ People-Centrism', 'd': 'đ´ Both (Anti-Elitism + People-Centrism)' } LABEL_DESCRIPTIONS = { 'a': 'The sentence does not contain populist rhetoric.', 'b': 'The sentence contains negative references to elites or elite groups.', 'c': 'The sentence contains positive references to "the people" or common citizens.', 'd': 'The sentence combines both anti-elite rhetoric and people-centric language.' } # ============================================================================ # MODEL LOADING # ============================================================================ print("Loading model and tokenizer...") # Detect device device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Determine dtype based on device if device == "cuda": # Use float16 for GPU (more compatible than bfloat16) torch_dtype = torch.float16 else: # Use float32 for CPU torch_dtype = torch.float32 # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Load model (it's quantized, so bitsandbytes will handle it) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", low_cpu_mem_usage=True, trust_remote_code=True ) # Create pipeline # Don't pass device since we used device_map="auto" text_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer ) print("Model loaded successfully!") # ============================================================================ # INFERENCE FUNCTION # ============================================================================ def extract_label(response_text): """Extract the label (a, b, c, or d) from model response.""" # Look for patterns like "(a)", "(b)", "(c)", "(d)" match = re.search(r'\(([abcd])\)', response_text.lower()) if match: return match.group(1) # Fallback: look for the label words text_lower = response_text.lower() if 'both' in text_lower or 'anti-elitism and people-centrism' in text_lower: return 'd' elif 'anti-elitism' in text_lower or 'anti-elite' in text_lower: return 'b' elif 'people-centrism' in text_lower or 'people-centric' in text_lower: return 'c' elif 'no populism' in text_lower: return 'a' return 'a' # Default to no populism if unclear def classify_sentence(sentence, progress=gr.Progress()): """Classify a sentence for populist rhetoric.""" print(f"\n{'='*80}") print(f"CLASSIFY REQUEST: {sentence[:100]}...") print(f"{'='*80}") if not sentence or sentence.strip() == "": status_msg = "â ī¸ Please enter a sentence to classify." yield status_msg, "", "" return # End generator try: # Step 1: Building prompt progress(0.1, desc="đ¨ Building prompt...") print("Step 1: Building prompt...") status_1 = "âŗ **Step 1/4:** Building prompt..." yield status_1, "", "" conversation = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": INSTRUCTION + '\n\n' + QUERY.format(sentence)} ] prompt = tokenizer.apply_chat_template( conversation=conversation, tokenize=False, add_generation_prompt=True, enable_thinking=False # Disable thinking to match audit_llms.py behavior ) print(f"Prompt length: {len(prompt)} chars") # Step 2: Running model inference progress(0.3, desc="đ¤ Running model inference...") print("Step 2: Running model inference...") status_2 = "âŗ **Step 2/4:** Running model inference... (this may take 10-30 seconds)" yield status_2, "", "" responses = text_pipeline( prompt, do_sample=False, num_return_sequences=1, return_full_text=False, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) # Step 3: Processing response progress(0.8, desc="âī¸ Processing results...") print("Step 3: Processing results...") status_3 = "âŗ **Step 3/4:** Processing results..." yield status_3, "", "" response_text = RESPONSE_START + responses[0]['generated_text'] print(f"Response: {response_text[:200]}...") # Extract label label = extract_label(response_text) label_name = LABEL_MAP.get(label, 'â Unknown') label_desc = LABEL_DESCRIPTIONS.get(label, '') print(f"Extracted label: {label} -> {label_name}") # Step 4: Formatting output progress(0.95, desc="đ Formatting output...") print("Step 4: Formatting output...") # Determine background color based on label color_map = { 'a': {'bg': '#e3f2fd', 'text': '#0d47a1', 'border': '#1976d2', 'name': 'No Populism'}, 'b': {'bg': '#fff9c4', 'text': '#e65100', 'border': '#f57f17', 'name': 'Anti-Elitism'}, 'c': {'bg': '#e8f5e9', 'text': '#1b5e20', 'border': '#388e3c', 'name': 'People-Centrism'}, 'd': {'bg': '#ffebee', 'text': '#b71c1c', 'border': '#d32f2f', 'name': 'Both'} } colors = color_map.get(label, color_map['a']) # Result card HTML with DARK text on light backgrounds result_html = f"""
""" # Reasoning markdown with VISIBLE text reasoning_md = f""" ### Model's Detailed Reasoning {response_text} --- **Note:** This is the model's explanation for its classification decision. The reasoning shows how the model analyzed the sentence based on the populism detection criteria. """ progress(1.0, desc="â Complete!") print(f"Classification complete! Label: {label_name}") print(f"{'='*80}\n") # Final result with success status - YIELD 3 outputs final_status = f"â **Classification Complete!** Result: {label_name}" yield final_status, result_html, reasoning_md except Exception as e: error_html = f"""An error occurred during classification. Please try again.
Error details: {str(e)}
If the problem persists, please check the model configuration.