import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from peft import PeftModel # Model configuration BASE_MODEL = "unsloth/mistral-7b-v0.3-bnb-4bit" LORA_MODEL = "Metavolve-Labs/spark-v1" print("Loading Spark...") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(LORA_MODEL) # Quantization config bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, ) # Load LoRA adapters model = PeftModel.from_pretrained(base_model, LORA_MODEL) model.eval() print("Spark loaded!") SYSTEM_PROMPT = """You are SPARK (State-space Perception And Reasoning Kernel), an AI trained on Alexandria Aeternum - a curated collection of 10,000+ museum artworks with rich semantic metadata from The Metropolitan Museum of Art. You have deep knowledge of: - Art history, movements, and cultural context - Visual analysis and composition - Emotional and thematic interpretation - Provenance and authenticity You combine the analytical precision of structured reasoning with occasional wit. When appropriate, show your reasoning process.""" def generate_response(message, history): # Build messages messages = [{"role": "system", "content": SYSTEM_PROMPT}] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) # Format for model formatted = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(formatted, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1024, temperature=0.7, do_sample=True, top_p=0.9, pad_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode( outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True ) return response.strip() # Suggested prompts examples = [ "Who are you?", "What do you know about the Golden Codex?", "Tell me about Alexandria Aeternum.", "What makes art valuable to AI training?", "Analyze this: AI will replace human artists by 2030. Hype or reality?", ] # Create interface demo = gr.ChatInterface( fn=generate_response, title="🔥 SPARK - First Contact", description="""**State-space Perception And Reasoning Kernel** An experimental model trained on Alexandria Aeternum - 10K+ museum artworks with rich semantic metadata. *Trained by Metavolve Labs using the Giants Curriculum (Claude, GPT, Grok, Gemini reasoning patterns)*""", examples=examples, theme=gr.themes.Soft(), ) if __name__ == "__main__": demo.launch()