import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import re # ============================================================================ # CONFIGURATION # ============================================================================ MODEL_NAME = "armaniii/Qwen3-4B-populism" # Update with your HuggingFace username # Prompts from helpers.py SYSTEM_PROMPT = ('You are a helpful AI assistant with expertise in identifying populism in public discourse.\n\n' 'Populism can be defined as an anti-elite discourse in the name of the "people". ' 'In other words, populism emphasizes the idea of the common "people" and ' 'often positions this group in opposition to a perceived elite group.\n\n' 'There are two core elements in identifying populism: ' '(i) anti-elitism, i.e., negative invocations of "elites", and ' '(ii) people-centrism, i.e., positive invocations of the "people".\n\n') INSTRUCTION = ('You must classify each sentence in one of the following categories:\n\n' '(a) No populism.\n' '(b) Anti-elitism, i.e., negative invocations of "elites".\n' '(c) People-centrism, i.e., positive invocations of the "People".\n' '(d) Both people-centrism and anti-elitism populism.\n\n') QUERY = 'Which is the most relevant category for the sentence: "{}"?' RESPONSE_START = 'I would categorize this sentence as (' # Label mappings LABEL_MAP = { 'a': 'đŸ”ĩ No Populism', 'b': '🟡 Anti-Elitism', 'c': 'đŸŸĸ People-Centrism', 'd': '🔴 Both (Anti-Elitism + People-Centrism)' } LABEL_DESCRIPTIONS = { 'a': 'The sentence does not contain populist rhetoric.', 'b': 'The sentence contains negative references to elites or elite groups.', 'c': 'The sentence contains positive references to "the people" or common citizens.', 'd': 'The sentence combines both anti-elite rhetoric and people-centric language.' } # ============================================================================ # MODEL LOADING # ============================================================================ print("Loading model and tokenizer...") # Detect device device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Determine dtype based on device if device == "cuda": # Use float16 for GPU (more compatible than bfloat16) torch_dtype = torch.float16 else: # Use float32 for CPU torch_dtype = torch.float32 # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Load model (it's quantized, so bitsandbytes will handle it) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", low_cpu_mem_usage=True, trust_remote_code=True ) # Create pipeline # Don't pass device since we used device_map="auto" text_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer ) print("Model loaded successfully!") # ============================================================================ # INFERENCE FUNCTION # ============================================================================ def extract_label(response_text): """Extract the label (a, b, c, or d) from model response.""" # Look for patterns like "(a)", "(b)", "(c)", "(d)" match = re.search(r'\(([abcd])\)', response_text.lower()) if match: return match.group(1) # Fallback: look for the label words text_lower = response_text.lower() if 'both' in text_lower or 'anti-elitism and people-centrism' in text_lower: return 'd' elif 'anti-elitism' in text_lower or 'anti-elite' in text_lower: return 'b' elif 'people-centrism' in text_lower or 'people-centric' in text_lower: return 'c' elif 'no populism' in text_lower: return 'a' return 'a' # Default to no populism if unclear def classify_sentence(sentence, progress=gr.Progress()): """Classify a sentence for populist rhetoric.""" print(f"\n{'='*80}") print(f"CLASSIFY REQUEST: {sentence[:100]}...") print(f"{'='*80}") if not sentence or sentence.strip() == "": status_msg = "âš ī¸ Please enter a sentence to classify." yield status_msg, "", "" return # End generator try: # Step 1: Building prompt progress(0.1, desc="🔨 Building prompt...") print("Step 1: Building prompt...") status_1 = "âŗ **Step 1/4:** Building prompt..." yield status_1, "", "" conversation = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": INSTRUCTION + '\n\n' + QUERY.format(sentence)} ] prompt = tokenizer.apply_chat_template( conversation=conversation, tokenize=False, add_generation_prompt=True, enable_thinking=False # Disable thinking to match audit_llms.py behavior ) print(f"Prompt length: {len(prompt)} chars") # Step 2: Running model inference progress(0.3, desc="🤖 Running model inference...") print("Step 2: Running model inference...") status_2 = "âŗ **Step 2/4:** Running model inference... (this may take 10-30 seconds)" yield status_2, "", "" responses = text_pipeline( prompt, do_sample=False, num_return_sequences=1, return_full_text=False, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) # Step 3: Processing response progress(0.8, desc="âš™ī¸ Processing results...") print("Step 3: Processing results...") status_3 = "âŗ **Step 3/4:** Processing results..." yield status_3, "", "" response_text = RESPONSE_START + responses[0]['generated_text'] print(f"Response: {response_text[:200]}...") # Extract label label = extract_label(response_text) label_name = LABEL_MAP.get(label, '❓ Unknown') label_desc = LABEL_DESCRIPTIONS.get(label, '') print(f"Extracted label: {label} -> {label_name}") # Step 4: Formatting output progress(0.95, desc="📝 Formatting output...") print("Step 4: Formatting output...") # Determine background color based on label color_map = { 'a': {'bg': '#e3f2fd', 'text': '#0d47a1', 'border': '#1976d2', 'name': 'No Populism'}, 'b': {'bg': '#fff9c4', 'text': '#e65100', 'border': '#f57f17', 'name': 'Anti-Elitism'}, 'c': {'bg': '#e8f5e9', 'text': '#1b5e20', 'border': '#388e3c', 'name': 'People-Centrism'}, 'd': {'bg': '#ffebee', 'text': '#b71c1c', 'border': '#d32f2f', 'name': 'Both'} } colors = color_map.get(label, color_map['a']) # Result card HTML with DARK text on light backgrounds result_html = f"""

đŸŽ¯ Classification Result

{label_name.split()[0]}

{colors['name']}

Description: {label_desc}

""" # Reasoning markdown with VISIBLE text reasoning_md = f""" ### Model's Detailed Reasoning {response_text} --- **Note:** This is the model's explanation for its classification decision. The reasoning shows how the model analyzed the sentence based on the populism detection criteria. """ progress(1.0, desc="✅ Complete!") print(f"Classification complete! Label: {label_name}") print(f"{'='*80}\n") # Final result with success status - YIELD 3 outputs final_status = f"✅ **Classification Complete!** Result: {label_name}" yield final_status, result_html, reasoning_md except Exception as e: error_html = f"""

âš ī¸ Error

An error occurred during classification. Please try again.

Error details: {str(e)}

If the problem persists, please check the model configuration.

""" print(f"Error in classify_sentence: {e}") import traceback traceback.print_exc() error_status = f"❌ **Error:** {str(e)}" error_reasoning = "No reasoning available due to error." yield error_status, error_html, error_reasoning # ============================================================================ # GRADIO INTERFACE # ============================================================================ # Example sentences examples = [ ["We need to stand up against the corrupt establishment that has betrayed the American people."], ["The policy aims to improve economic growth through targeted investments."], ["It's time for ordinary Americans to take back control from the political elites."], ["We must fight for the rights and interests of everyday working families."], ["The meeting will be held next Tuesday at 3 PM."], ["The elite media doesn't care about regular folks like us."], ["Together, we the people can build a better future for our children."], ] # Custom CSS custom_css = """ #component-0 { max-width: 1000px; margin: auto; padding: 20px; } /* Status box styling */ #status_box { padding: 15px !important; border-radius: 10px !important; margin: 15px 0 !important; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; font-weight: 500 !important; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; min-height: 50px !important; text-align: center !important; } /* Result box container */ #output_box { padding: 0 !important; margin-top: 20px !important; } /* Make sure HTML renders properly */ #output_box .prose { max-width: 100% !important; } footer { display: none !important; } """ # Build interface with gr.Blocks(css=custom_css, title="Populism Detector") as demo: gr.Markdown(""" # đŸŽ¯ Populism Detection in Political Discourse This tool uses a fine-tuned language model to detect populist rhetoric in sentences. Populism is characterized by anti-elite discourse in the name of "the people." ### About the Categories - **đŸ”ĩ No Populism**: The sentence does not contain populist elements - **🟡 Anti-Elitism**: Negative references to elites or powerful groups - **đŸŸĸ People-Centrism**: Positive references to "the people" or ordinary citizens - **🔴 Both**: Combines anti-elite rhetoric with people-centric language --- """) with gr.Row(): with gr.Column(): input_text = gr.Textbox( label="Enter a sentence to analyze", placeholder="Type or paste a sentence here...", lines=3 ) submit_btn = gr.Button("🔍 Classify Sentence", variant="primary", size="lg") # Status indicator status_box = gr.Markdown( value="", label="Status", visible=True, elem_id="status_box" ) # Classification result card (always visible) result_card = gr.HTML( value="", label="Classification Result" ) # Model reasoning (collapsible) with gr.Accordion("📖 Show Model Reasoning", open=False) as reasoning_accordion: reasoning_text = gr.Markdown( value="", label="" ) gr.Markdown("### 📝 Try these examples:") gr.Examples( examples=examples, inputs=input_text, outputs=[status_box, result_card, reasoning_text], fn=classify_sentence, cache_examples=False ) # Button click handler with loading state submit_btn.click( fn=classify_sentence, inputs=input_text, outputs=[status_box, result_card, reasoning_text], api_name="classify", show_progress="full" # Show full progress bar with percentage ) # Enter key handler input_text.submit( fn=classify_sentence, inputs=input_text, outputs=[status_box, result_card, reasoning_text], show_progress="full" ) gr.Markdown(""" --- ### â„šī¸ About This Model This model is fine-tuned on political speeches to identify populist rhetoric. It analyzes sentence-level discourse and classifies based on two key dimensions: 1. **Anti-Elitism**: Language that criticizes or opposes elite groups 2. **People-Centrism**: Language that emphasizes or champions "the people" **Note**: This tool is for research and educational purposes. Results should be interpreted in context and not used as definitive political classifications. Built with [Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B) fine-tuned on populist speech data. """) # Launch the app if __name__ == "__main__": # Enable queue for progress tracking demo.queue() demo.launch()