nova-ai / cynical-generator.py
shubhjn's picture
Fix: Update all scripts to use port 7860
26b376d
"""
Nova AI - Continuous Data Generator with Gemini
Generates training data using Gemini API and feeds it to Nova AI continuously.
"""
import json
import random
import asyncio
import os
import time
from datetime import datetime
from typing import Optional
import httpx
# Try to import google.generativeai
try:
import google.generativeai as genai
GEMINI_AVAILABLE = True
except ImportError:
GEMINI_AVAILABLE = False
print("Warning: google-generativeai not installed. Run: pip install google-generativeai")
class ContinuousDataGenerator:
"""Generates training data using Gemini and feeds to Nova AI."""
def __init__(
self,
gemini_api_key: Optional[str] = None,
nova_api_url: str = "http://localhost:7860",
output_file: str = "generated_data.jsonl"
):
self.nova_api_url = nova_api_url
self.output_file = output_file
self.gemini_model = None
self.total_generated = 0
self.total_fed = 0
# Get API key from parameter or environment
api_key = gemini_api_key or os.environ.get("GEMINI_API_KEY")
# Initialize Gemini if key available
if api_key and GEMINI_AVAILABLE:
genai.configure(api_key=api_key)
self.gemini_model = genai.GenerativeModel('gemini-2.0-flash')
print("βœ“ Gemini API initialized")
else:
print("⚠ Gemini not configured - using local generation only")
# Topic categories for diverse data generation
TOPICS = [
"human nature", "trust", "power", "success", "relationships",
"work", "money", "society", "politics", "technology",
"emotions", "intelligence", "creativity", "leadership", "morality",
"truth", "deception", "competition", "cooperation", "survival"
]
STYLES = [
"cynical wisdom", "realistic observation", "pragmatic advice",
"philosophical insight", "dark humor", "blunt truth"
]
async def generate_with_gemini(self, topic: str, style: str) -> Optional[dict]:
"""Generate a Q&A pair using Gemini."""
if not self.gemini_model:
return None
prompt = f"""Generate a single question and answer about {topic} in a {style} style.
The answer should be thought-provoking, realistic, and somewhat cynical but insightful.
Format exactly as:
Question: [your question here]
Answer: [your answer here]
Do not include any other text."""
try:
response = self.gemini_model.generate_content(prompt)
text = response.text.strip()
# Parse response
lines = text.split('\n')
question = ""
answer = ""
for line in lines:
if line.startswith("Question:"):
question = line.replace("Question:", "").strip()
elif line.startswith("Answer:"):
answer = line.replace("Answer:", "").strip()
if question and answer:
return {"instruction": question, "output": answer}
except Exception as e:
print(f"Gemini error: {e}")
return None
def generate_local(self, topic: str) -> dict:
"""Generate data locally using templates."""
questions = [
f"What is the truth about {topic}?",
f"How should I think about {topic}?",
f"What do people get wrong about {topic}?",
f"Give me realistic advice about {topic}.",
f"What's the harsh reality of {topic}?",
]
answers = [
f"{topic.title()} is rarely what it seems. Look deeper and protect yourself.",
f"The world runs on {topic} but few understand its true nature.",
f"Most people are naive about {topic}. Be the exception.",
f"Understanding {topic} gives you power. Use it wisely.",
f"The truth about {topic}? It's a tool. Learn to wield it.",
]
return {
"instruction": random.choice(questions),
"output": random.choice(answers)
}
async def generate_batch(self, count: int = 10, use_gemini: bool = True) -> list:
"""Generate a batch of training data."""
results = []
for _ in range(count):
topic = random.choice(self.TOPICS)
style = random.choice(self.STYLES)
entry = None
if use_gemini and self.gemini_model:
entry = await self.generate_with_gemini(topic, style)
if not entry:
entry = self.generate_local(topic)
results.append(entry)
self.total_generated += 1
return results
async def feed_to_nova(self, entries: list) -> int:
"""Feed generated data to Nova AI knowledge base."""
fed = 0
async with httpx.AsyncClient(timeout=30.0) as client:
for entry in entries:
text = f"Q: {entry['instruction']}\nA: {entry['output']}"
try:
response = await client.post(
f"{self.nova_api_url}/api/knowledge/add",
json={"text": text, "source": "gemini_generator"}
)
if response.status_code == 200:
fed += 1
self.total_fed += 1
except Exception as e:
pass # Continue on error
return fed
def save_to_file(self, entries: list):
"""Append entries to output file."""
with open(self.output_file, 'a', encoding='utf-8') as f:
for entry in entries:
f.write(json.dumps(entry) + '\n')
async def run_continuous(
self,
batch_size: int = 10,
interval_seconds: int = 60,
feed_to_nova: bool = True,
save_to_file: bool = True,
max_iterations: Optional[int] = None
):
"""Run continuous data generation loop."""
print(f"\n{'='*50}")
print("πŸš€ Starting Continuous Data Generation")
print(f"{'='*50}")
print(f"Batch size: {batch_size}")
print(f"Interval: {interval_seconds}s")
print(f"Feed to Nova: {feed_to_nova}")
print(f"Save to file: {self.output_file if save_to_file else 'No'}")
print(f"{'='*50}\n")
iteration = 0
while max_iterations is None or iteration < max_iterations:
iteration += 1
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] Iteration {iteration} - Generating {batch_size} entries...")
# Generate batch
entries = await self.generate_batch(batch_size, use_gemini=True)
# Save to file
if save_to_file:
self.save_to_file(entries)
# Feed to Nova AI
if feed_to_nova:
fed = await self.feed_to_nova(entries)
print(f" β†’ Generated: {len(entries)}, Fed to Nova: {fed}")
else:
print(f" β†’ Generated: {len(entries)}")
print(f" β†’ Total: {self.total_generated} generated, {self.total_fed} fed")
# Wait for next iteration
if max_iterations is None or iteration < max_iterations:
print(f" β†’ Waiting {interval_seconds}s...\n")
await asyncio.sleep(interval_seconds)
print(f"\n{'='*50}")
print(f"βœ… Completed {iteration} iterations")
print(f" Total generated: {self.total_generated}")
print(f" Total fed to Nova: {self.total_fed}")
print(f"{'='*50}")
async def main():
import argparse
parser = argparse.ArgumentParser(description="Continuous AI Data Generator")
parser.add_argument("--gemini-key", help="Gemini API key (or set GEMINI_API_KEY env)")
parser.add_argument("--nova-url", default="http://localhost:7860", help="Nova API URL")
parser.add_argument("--batch-size", type=int, default=10, help="Entries per batch")
parser.add_argument("--interval", type=int, default=60, help="Seconds between batches")
parser.add_argument("--output", default="generated_data.jsonl", help="Output file")
parser.add_argument("--no-feed", action="store_true", help="Don't feed to Nova AI")
parser.add_argument("--iterations", type=int, help="Max iterations (infinite if not set)")
args = parser.parse_args()
# Get Gemini key
gemini_key = args.gemini_key or os.environ.get("GEMINI_API_KEY")
if not gemini_key:
print("\n⚠️ No Gemini API key provided!")
print(" Set GEMINI_API_KEY environment variable or use --gemini-key")
print(" Will use local generation only.\n")
# Create generator
generator = ContinuousDataGenerator(
gemini_api_key=gemini_key,
nova_api_url=args.nova_url,
output_file=args.output
)
# Run continuous loop
await generator.run_continuous(
batch_size=args.batch_size,
interval_seconds=args.interval,
feed_to_nova=not args.no_feed,
save_to_file=True,
max_iterations=args.iterations
)
if __name__ == "__main__":
asyncio.run(main())