Final_Assignment_Template_Unit4

Sleeping

App Files Files Community

bwilkie commited on Aug 9

Commit

c99f4b7

verified ·

1 Parent(s): e268f7b

Update agent_simple.py

Browse files

Files changed (1) hide show

agent_simple.py +235 -115

agent_simple.py CHANGED Viewed

@@ -1,133 +1,238 @@
-"""LangGraph Agent"""
 import os
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_groq import ChatGroq
-from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
-from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.tools import tool
 load_dotenv()
-from langchain_core.rate_limiters import InMemoryRateLimiter
-# Create a rate limiter
-rate_limiter = InMemoryRateLimiter(
-    requests_per_second=0.1,  # Once every 10 seconds
-    check_every_n_seconds=0.1,
-    max_bucket_size=10,
-)
 @tool
 def multiply(a: int, b: int) -> int:
-    """Multiply two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
-    """Add two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
-    """Subtract two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a - b
 @tool
-def divide(a: int, b: int) -> int:
-    """Divide two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
-    """Get the modulus of two numbers.
-    Args:
-        a: first int
-        b: second int
-    """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
-    """Search Wikipedia for a query and return maximum 2 results.
-    Args:
-        query: The search query."""
-    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-            for doc in search_docs
-        ])
-    return {"wiki_results": formatted_search_docs}
 @tool
 def web_search(query: str) -> str:
-    """Search Tavily for a query and return maximum 3 results.
-    Args:
-        query: The search query."""
-    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-            for doc in search_docs
-        ])
-    return {"web_results": formatted_search_docs}
 @tool
-def arvix_search(query: str) -> str:
-    """Search Arxiv for a query and return maximum 3 result.
-    Args:
-        query: The search query."""
-    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
-            for doc in search_docs
-        ])
-    return {"arvix_results": formatted_search_docs}
-# load the system prompt from the file
-with open("system_prompt.txt", "r", encoding="utf-8") as f:
-    system_prompt = f.read()
-# System message
 sys_msg = SystemMessage(content=system_prompt)
 tools = [
     multiply,
     add,
@@ -136,55 +241,70 @@ tools = [
     modulus,
     wiki_search,
     web_search,
-    arvix_search,
 ]
-# Build graph function
-def build_graph(provider: str = "groq"):
-    """Build the graph"""
-    # Load environment variables from .env file
-    if provider == "google":
-        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, rate_limiter=rate_limiter)
-    elif provider == "groq":
-        llm = ChatGroq(model="qwen-qwen3-32b", temperature=0, rate_limiter=rate_limiter)
-    elif provider == "huggingface":
-        llm = ChatHuggingFace(
-            llm=HuggingFaceEndpoint(
-                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
-                temperature=0,
-                rate_limiter=rate_limiter,
-            ),
         )
     else:
-        raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
     # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
-    # Node
     def assistant(state: MessagesState):
         """Assistant node"""
-        return {"messages": [llm_with_tools.invoke(state["messages"])]}
     builder = StateGraph(MessagesState)
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
     builder.add_edge(START, "assistant")
-    builder.add_conditional_edges(
-        "assistant",
-        tools_condition,
-    )
     builder.add_edge("tools", "assistant")
-    # Compile graph
     return builder.compile()
-# test
 if __name__ == "__main__":
     question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
-    # Build the graph
-    graph = build_graph(provider="groq")
-    # Run the graph
-    messages = [HumanMessage(content=question)]
-    messages = graph.invoke({"messages": messages})
-    for m in messages["messages"]:
-        m.pretty_print()

+"""LangGraph Agent with Direct Groq API and Custom Rate Limiting"""
 import os
+import time
+import threading
+from collections import deque
+from typing import Dict, Any, List
 from dotenv import load_dotenv
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_core.tools import tool
+from groq import Groq, RateLimitError
+import logging
 load_dotenv()
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class GroqRateLimiter:
+    """Thread-safe rate limiter for direct Groq API calls"""
+    def __init__(self, rpm: int = 20, tpm: int = 6000):
+        self.rpm = rpm  # Requests per minute
+        self.tpm = tpm  # Tokens per minute
+        self.request_times = deque()
+        self.token_usage = deque()  # (timestamp, token_count) tuples
+        self.lock = threading.Lock()
+    def _clean_old_records(self, current_time: float):
+        """Remove records older than 1 minute"""
+        minute_ago = current_time - 60
+        while self.request_times and self.request_times[0] <= minute_ago:
+            self.request_times.popleft()
+        while self.token_usage and self.token_usage[0][0] <= minute_ago:
+            self.token_usage.popleft()
+    def can_make_request(self, estimated_tokens: int = 1000) -> tuple[bool, float]:
+        """Check if request can be made, return (can_proceed, wait_time)"""
+        with self.lock:
+            current_time = time.time()
+            self._clean_old_records(current_time)
+            wait_time = 0
+            # Check RPM limit
+            if len(self.request_times) >= self.rpm:
+                oldest_request = self.request_times[0]
+                wait_time = max(wait_time, 60 - (current_time - oldest_request))
+            # Check TPM limit
+            current_tokens = sum(tokens for _, tokens in self.token_usage)
+            if current_tokens + estimated_tokens > self.tpm:
+                if self.token_usage:
+                    oldest_token_time = self.token_usage[0][0]
+                    wait_time = max(wait_time, 60 - (current_time - oldest_token_time))
+            return wait_time <= 0, wait_time
+    def record_request(self, token_count: int):
+        """Record a successful request"""
+        with self.lock:
+            current_time = time.time()
+            self.request_times.append(current_time)
+            self.token_usage.append((current_time, token_count))
+class GroqWrapper:
+    """Wrapper for direct Groq API with rate limiting and error handling"""
+    def __init__(self, model: str = "llama-3.1-70b-versatile",
+                 rpm: int = 30, tpm: int = 6000):
+        self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+        self.model = model
+        self.rate_limiter = GroqRateLimiter(rpm=rpm, tpm=tpm)
+    def estimate_tokens(self, messages: List[Dict]) -> int:
+        """Rough token estimation (4 chars ≈ 1 token)"""
+        total_chars = sum(len(str(msg.get('content', ''))) for msg in messages)
+        return max(total_chars // 4, 100)
+    def invoke(self, messages: List[Dict], **kwargs) -> Dict:
+        """Invoke Groq API with rate limiting and retry logic"""
+        # Convert LangChain messages to Groq format if needed
+        groq_messages = []
+        for msg in messages:
+            if hasattr(msg, 'content') and hasattr(msg, 'type'):
+                # LangChain message object
+                role = "user" if msg.type == "human" else "assistant" if msg.type == "ai" else "system"
+                groq_messages.append({"role": role, "content": str(msg.content)})
+            else:
+                # Already in dict format
+                groq_messages.append(msg)
+        estimated_tokens = self.estimate_tokens(groq_messages)
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                # Check rate limits
+                can_proceed, wait_time = self.rate_limiter.can_make_request(estimated_tokens)
+                if not can_proceed:
+                    logger.info(f"Rate limit: waiting {wait_time:.2f} seconds")
+                    time.sleep(wait_time)
+                # Make the API call
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=groq_messages,
+                    **kwargs
+                )
+                # Record successful request
+                actual_tokens = response.usage.total_tokens if hasattr(response, 'usage') else estimated_tokens
+                self.rate_limiter.record_request(actual_tokens)
+                # Convert back to LangChain format
+                content = response.choices[0].message.content
+                return AIMessage(content=content)
+            except RateLimitError as e:
+                if attempt == max_retries - 1:
+                    raise e
+                # Use retry-after header if available
+                retry_after = getattr(e.response, 'headers', {}).get('retry-after')
+                if retry_after:
+                    delay = float(retry_after)
+                else:
+                    delay = 2 ** attempt  # Exponential backoff
+                logger.warning(f"Rate limited. Retrying in {delay} seconds (attempt {attempt + 1})")
+                time.sleep(delay)
+            except Exception as e:
+                logger.error(f"Groq API error: {e}")
+                if attempt == max_retries - 1:
+                    raise e
+                time.sleep(2 ** attempt)
+        raise Exception("Max retries exceeded")
+    def bind_tools(self, tools):
+        """Mock bind_tools method for compatibility"""
+        self.tools = tools
+        return self
+# Your existing tools
 @tool
 def multiply(a: int, b: int) -> int:
+    """Multiply two numbers."""
     return a * b
 @tool
 def add(a: int, b: int) -> int:
+    """Add two numbers."""
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
+    """Subtract two numbers."""
     return a - b
 @tool
+def divide(a: float, b: float) -> float:
+    """Divide two numbers."""
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers."""
     return a % b
 @tool
 def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results."""
+    try:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+                for doc in search_docs
+            ])
+        return {"wiki_results": formatted_search_docs}
+    except Exception as e:
+        return {"wiki_results": f"Error: {str(e)}"}
 @tool
 def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results."""
+    try:
+        search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.get("url", "")}">\n{doc.get("content", "")}\n</Document>'
+                for doc in search_docs
+            ])
+        return {"web_results": formatted_search_docs}
+    except Exception as e:
+        return {"web_results": f"Error: {str(e)}"}
 @tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 results."""
+    try:
+        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
+                for doc in search_docs
+            ])
+        return {"arxiv_results": formatted_search_docs}
+    except Exception as e:
+        return {"arxiv_results": f"Error: {str(e)}"}
+def load_system_prompt():
+    """Load system prompt with error handling"""
+    try:
+        with open("system_prompt.txt", "r", encoding="utf-8") as f:
+            return f.read()
+    except FileNotFoundError:
+        logger.warning("system_prompt.txt not found, using default prompt")
+        return "You are a helpful AI assistant."
+system_prompt = load_system_prompt()
 sys_msg = SystemMessage(content=system_prompt)
 tools = [
     multiply,
     add,
     modulus,
     wiki_search,
     web_search,
+    arxiv_search,
 ]
+def build_graph(provider: str = "direct_groq", model: str = "llama-3.1-70b-versatile"):
+    """Build the graph with direct Groq API and custom rate limiting"""
+    if provider == "direct_groq":
+        # Use custom Groq wrapper with rate limiting
+        llm = GroqWrapper(model=model, rpm=30, tpm=6000)  # Adjust based on your plan
+    elif provider == "langchain_groq":
+        # Use LangChain's ChatGroq with native rate limiting
+        from langchain_core.rate_limiters import InMemoryRateLimiter
+        rate_limiter = InMemoryRateLimiter(
+            requests_per_second=0.5,  # 30 RPM
+            check_every_n_seconds=0.1,
+            max_bucket_size=5,
+        )
+        from langchain_groq import ChatGroq
+        llm = ChatGroq(
+            model=model,
+            temperature=0,
+            groq_api_key=os.getenv("GROQ_API_KEY"),
+            rate_limiter=rate_limiter
         )
     else:
+        raise ValueError("Choose 'direct_groq' or 'langchain_groq'")
     # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
     def assistant(state: MessagesState):
         """Assistant node"""
+        try:
+            response = llm_with_tools.invoke(state["messages"])
+            return {"messages": [response]}
+        except Exception as e:
+            logger.error(f"Assistant failed: {e}")
+            error_msg = AIMessage(content=f"I encountered an error: {str(e)}")
+            return {"messages": [error_msg]}
+    # Build the graph
     builder = StateGraph(MessagesState)
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
     builder.add_edge(START, "assistant")
+    builder.add_conditional_edges("assistant", tools_condition)
     builder.add_edge("tools", "assistant")
     return builder.compile()
 if __name__ == "__main__":
     question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
+    try:
+        # Test with direct Groq API
+        graph = build_graph(provider="direct_groq")
+        messages = [HumanMessage(content=question)]
+        result = graph.invoke({"messages": messages})
+        for m in result["messages"]:
+            m.pretty_print()
+    except Exception as e:
+        logger.error(f"Test failed: {e}")