Spaces:

MCP-1st-Birthday
/

code-execution-with-mcp

No application file

App Files Files Community

jkadowak commited on Nov 28, 2025

Commit

aa122d4

1 Parent(s): 61bb790

add better tests, restructure spec, add dynamic prompt

Browse files

Files changed (20) hide show

README.md +69 -11
agent_api/agent_manager.py +3 -6
agent_api/cache.py +2 -1
agent_api/config.py +2 -2
agent_api/converters.py +1 -2
agent_api/models.py +10 -10
agent_api/server.py +10 -15
agent_api/session_store.py +1 -2
claude_spec/CLAUDE.md +315 -0
generate_diagram.py +5 -5
mcp_server/docker_client.py +1 -3
mcp_server/server.py +37 -26
mcp_server/utils/__init__.py +8 -8
mcp_server/utils/skill_utils.py +24 -20
tests/test_agent_api/conftest.py +1 -1
tests/test_agent_api/test_direct_agent.py +15 -11
tests/test_agent_api/test_server.py +1 -4
tests/test_config_loading.py +0 -163
tests/test_mcp_server/conftest.py +1 -0
tests/test_mcp_server/test_server.py +13 -11

README.md CHANGED Viewed

@@ -19,19 +19,77 @@ A secure, containerized code execution platform for AI agents with multi-user is
 ## Architecture
-![Architecture Diagram](https://huggingface.co/spaces/MCP-1st-Birthday/code-execution-with-mcp/blob/refs%2Fpr%2F2/docs/architecture.png)
-<details>
-<summary>View/Edit Diagram Source</summary>
-The diagram source is maintained in [generate_diagram.py](generate_diagram.py). To regenerate the diagram after making changes:
-```bash
-python3 generate_diagram.py
 ```
-</details>
 ## Key Features
 ### Security & Isolation

 ## Architecture
+```mermaid
+flowchart TB
+    subgraph Frontend["🎨 Frontend Layer"]
+        UI["💬 Gradio UI<br/>━━━━━━━━━━━<br/>👤 User Authentication<br/>📡 Real-time Streaming<br/>📊 Artifact Viewer"]
+    end
+    subgraph AgentAPI["🤖 Agent API Layer"]
+        API["⚡ OpenAI-Compatible API<br/>━━━━━━━━━━━<br/>🔥 FastAPI + Google ADK<br/>💭 Agent Reasoning Loop<br/>🌊 Streaming Responses"]
+    end
+    subgraph MCPServer["🔧 MCP Server Layer"]
+        MCP{"🎯 FastMCP Server<br/>━━━━━━━━━━━<br/>🛠️ Tool Registry<br/>👥 User Context"}
+        T1[["⚙️ execute_bash<br/>Run Commands"]]
+        T2[["📖 read_file<br/>Read Files"]]
+        T3[["✍️ write_file<br/>Write Files"]]
+        T4[["📚 read_docstring<br/>Get Docs"]]
+    end
+    subgraph ExecClient["🐳 Execution Client Layer"]
+        CLIENT[("🎮 DockerExecutionClient<br/>━━━━━━━━━━━<br/>📦 Container Manager<br/>⚡ Async Executor<br/>🔐 User Isolation")]
+    end
+    subgraph Containers["🏠 Container Isolation Layer"]
+        C1{{"🐍 User Container 1<br/>━━━━━━━━━━━<br/>Python 3.12<br/>👤 Non-root User<br/>📁 /workspace"}}
+        CN{{"🐍 User Container N<br/>━━━━━━━━━━━<br/>Python 3.12<br/>👤 Non-root User<br/>📁 /workspace"}}
+    end
+    subgraph Resources["📦 Shared Resources"]
+        TOOLS[["🔨 Tools Directory<br/>🔒 Read-only"]]
+        SKILLS[["✨ Skills Directory<br/>🔒 Read-only"]]
+    end
+    UI ==>|"📨 HTTP Requests"| API
+    API ==>|"🔌 MCP Protocol"| MCP
+    MCP -.->|invoke| T1
+    MCP -.->|invoke| T2
+    MCP -.->|invoke| T3
+    MCP -.->|invoke| T4
+    T1 -->|execute| CLIENT
+    T2 -->|execute| CLIENT
+    T3 -->|execute| CLIENT
+    T4 -->|execute| CLIENT
+    CLIENT ==>|"🚀 Creates & Manages"| C1
+    CLIENT ==>|"🚀 Creates & Manages"| CN
+    TOOLS -.-o|"📌 mount /tools"| C1
+    TOOLS -.-o|"📌 mount /tools"| CN
+    SKILLS -.-o|"📌 mount /skills"| C1
+    SKILLS -.-o|"📌 mount /skills"| CN
+    style UI fill:#e3f2fd,stroke:#1565c0,stroke-width:3px,color:#000
+    style API fill:#fff3e0,stroke:#e65100,stroke-width:3px,color:#000
+    style MCP fill:#f3e5f5,stroke:#6a1b9a,stroke-width:3px,color:#000
+    style CLIENT fill:#e8f5e9,stroke:#2e7d32,stroke-width:3px,color:#000
+    style C1 fill:#fff9c4,stroke:#f57f17,stroke-width:3px,color:#000
+    style CN fill:#fff9c4,stroke:#f57f17,stroke-width:3px,color:#000
+    style TOOLS fill:#ffebee,stroke:#c62828,stroke-width:3px,color:#000
+    style SKILLS fill:#fce4ec,stroke:#ad1457,stroke-width:3px,color:#000
+    style T1 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
+    style T2 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
+    style T3 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
+    style T4 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
+    style Frontend fill:#e8eaf6,stroke:#3f51b5,stroke-width:3px,stroke-dasharray: 5 5
+    style AgentAPI fill:#fff8e1,stroke:#ff6f00,stroke-width:3px,stroke-dasharray: 5 5
+    style MCPServer fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px,stroke-dasharray: 5 5
+    style ExecClient fill:#e0f2f1,stroke:#00695c,stroke-width:3px,stroke-dasharray: 5 5
+    style Containers fill:#fffde7,stroke:#f9a825,stroke-width:3px,stroke-dasharray: 5 5
+    style Resources fill:#fce4ec,stroke:#c2185b,stroke-width:3px,stroke-dasharray: 5 5
 ```
 ## Key Features
 ### Security & Isolation

agent_api/agent_manager.py CHANGED Viewed

@@ -2,15 +2,14 @@
 import logging
 from fastmcp.client import Client as FastMCPClient
 from google.adk import Agent, Runner
 from google.adk.models.lite_llm import LiteLlm
 from google.adk.sessions import InMemorySessionService
 from google.adk.tools.mcp_tool import McpToolset, StreamableHTTPConnectionParams
-from cache import ttl_cache
-from config import settings
 logger = logging.getLogger(__name__)
@@ -62,9 +61,7 @@ class AgentManager:
         try:
             # Connect to MCP server using FastMCP client
-            async with FastMCPClient(
-                connection_params={"url": self.mcp_server_url}
-            ) as client:
                 # Fetch the agent_system_prompt
                 result = await client.get_prompt("agent_system_prompt")

 import logging
+from cache import ttl_cache
+from config import settings
 from fastmcp.client import Client as FastMCPClient
 from google.adk import Agent, Runner
 from google.adk.models.lite_llm import LiteLlm
 from google.adk.sessions import InMemorySessionService
 from google.adk.tools.mcp_tool import McpToolset, StreamableHTTPConnectionParams
 logger = logging.getLogger(__name__)
         try:
             # Connect to MCP server using FastMCP client
+            async with FastMCPClient(connection_params={"url": self.mcp_server_url}) as client:
                 # Fetch the agent_system_prompt
                 result = await client.get_prompt("agent_system_prompt")

agent_api/cache.py CHANGED Viewed

@@ -3,7 +3,8 @@
 import functools
 import inspect
 import time
-from typing import Any, Callable, TypeVar
 T = TypeVar("T")

 import functools
 import inspect
 import time
+from collections.abc import Callable
+from typing import TypeVar
 T = TypeVar("T")

agent_api/config.py CHANGED Viewed

@@ -66,10 +66,10 @@ Be helpful, secure, and efficient!"""
             owner, _ = self.default_model.split("/", 1)
         except:
             owner = default_owner
         if owner:
             return owner
         return default_owner
 settings = Settings()

             owner, _ = self.default_model.split("/", 1)
         except:
             owner = default_owner
         if owner:
             return owner
         return default_owner
 settings = Settings()

agent_api/converters.py CHANGED Viewed

@@ -4,10 +4,9 @@ import json
 import logging
 import time
 import uuid
-from typing import AsyncGenerator
 from google.adk.events import Event
 from models import ChatCompletionChunk, Choice, DeltaContent
 logger = logging.getLogger(__name__)

 import logging
 import time
 import uuid
+from collections.abc import AsyncGenerator
 from google.adk.events import Event
 from models import ChatCompletionChunk, Choice, DeltaContent
 logger = logging.getLogger(__name__)

agent_api/models.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """OpenAI-compatible Pydantic models for Agent API."""
-from typing import Literal, Optional
-from pydantic import BaseModel, Field
 class ChatMessage(BaseModel):
@@ -18,17 +18,17 @@ class ChatCompletionRequest(BaseModel):
     model: str
     messages: list[ChatMessage]
     stream: Literal[True] = True  # Only streaming supported
-    temperature: Optional[float] = None
-    max_tokens: Optional[int] = None
-    user: Optional[str] = None  # User identifier for session management
 class DeltaContent(BaseModel):
     """Delta content for streaming chunks."""
-    role: Optional[str] = None
-    content: Optional[str] = None
-    tool_calls: Optional[list[dict]] = None
 class Choice(BaseModel):
@@ -36,7 +36,7 @@ class Choice(BaseModel):
     index: int
     delta: DeltaContent
-    finish_reason: Optional[str] = None
 class ChatCompletionChunk(BaseModel):
@@ -47,7 +47,7 @@ class ChatCompletionChunk(BaseModel):
     created: int  # Unix timestamp
     model: str
     choices: list[Choice]
-    usage: Optional[dict] = None
 class ModelInfo(BaseModel):

 """OpenAI-compatible Pydantic models for Agent API."""
+from typing import Literal
+from pydantic import BaseModel
 class ChatMessage(BaseModel):
     model: str
     messages: list[ChatMessage]
     stream: Literal[True] = True  # Only streaming supported
+    temperature: float | None = None
+    max_tokens: int | None = None
+    user: str | None = None  # User identifier for session management
 class DeltaContent(BaseModel):
     """Delta content for streaming chunks."""
+    role: str | None = None
+    content: str | None = None
+    tool_calls: list[dict] | None = None
 class Choice(BaseModel):
     index: int
     delta: DeltaContent
+    finish_reason: str | None = None
 class ChatCompletionChunk(BaseModel):
     created: int  # Unix timestamp
     model: str
     choices: list[Choice]
+    usage: dict | None = None
 class ModelInfo(BaseModel):

agent_api/server.py CHANGED Viewed

@@ -7,13 +7,12 @@ from contextlib import asynccontextmanager
 from datetime import datetime
 import httpx
-from fastapi import FastAPI, HTTPException, Request, status
-from fastapi.responses import JSONResponse, StreamingResponse
-from google.genai import types
 from agent_manager import AgentManager
 from config import settings
 from converters import convert_adk_events_to_openai, format_sse, format_sse_done
 from models import (
     ChatCompletionRequest,
     HealthResponse,
@@ -85,9 +84,7 @@ async def http_exception_handler(request: Request, exc: HTTPException):
     Returns:
         JSON response with error details
     """
-    return JSONResponse(
-        status_code=exc.status_code, content={"error": {"message": exc.detail}}
-    )
 @app.exception_handler(Exception)
@@ -142,7 +139,9 @@ async def list_models():
     return ModelList(
         data=[
             ModelInfo(
-                id=settings.default_model, created=int(time.time()), owned_by=settings.get_model_owner()
             )
         ]
     )
@@ -196,10 +195,7 @@ async def chat_completions(request: ChatCompletionRequest):
     logger.debug(f"User message: {user_message[:100]}...")
     # Convert string message to google.genai.types.Content
-    message_content = types.Content(
-        role="user",
-        parts=[types.Part(text=user_message)]
-    )
     # Stream events
     async def event_generator():
@@ -214,9 +210,7 @@ async def chat_completions(request: ChatCompletionRequest):
             )
             # Convert to OpenAI format
-            openai_chunks = convert_adk_events_to_openai(
-                events=adk_events, model=request.model
-            )
             # Stream as SSE
             async for chunk in openai_chunks:
@@ -266,6 +260,7 @@ async def root():
 if __name__ == "__main__":
     import os
     import uvicorn
     uvicorn.run(

 from datetime import datetime
 import httpx
 from agent_manager import AgentManager
 from config import settings
 from converters import convert_adk_events_to_openai, format_sse, format_sse_done
+from fastapi import FastAPI, HTTPException, Request, status
+from fastapi.responses import JSONResponse, StreamingResponse
+from google.genai import types
 from models import (
     ChatCompletionRequest,
     HealthResponse,
     Returns:
         JSON response with error details
     """
+    return JSONResponse(status_code=exc.status_code, content={"error": {"message": exc.detail}})
 @app.exception_handler(Exception)
     return ModelList(
         data=[
             ModelInfo(
+                id=settings.default_model,
+                created=int(time.time()),
+                owned_by=settings.get_model_owner(),
             )
         ]
     )
     logger.debug(f"User message: {user_message[:100]}...")
     # Convert string message to google.genai.types.Content
+    message_content = types.Content(role="user", parts=[types.Part(text=user_message)])
     # Stream events
     async def event_generator():
             )
             # Convert to OpenAI format
+            openai_chunks = convert_adk_events_to_openai(events=adk_events, model=request.model)
             # Stream as SSE
             async for chunk in openai_chunks:
 if __name__ == "__main__":
     import os
     import uvicorn
     uvicorn.run(

agent_api/session_store.py CHANGED Viewed

@@ -3,9 +3,8 @@
 import logging
 from datetime import datetime, timedelta
-from google.adk.sessions import InMemorySessionService
 from config import settings
 logger = logging.getLogger(__name__)

 import logging
 from datetime import datetime, timedelta
 from config import settings
+from google.adk.sessions import InMemorySessionService
 logger = logging.getLogger(__name__)

claude_spec/CLAUDE.md ADDED Viewed

	@@ -0,0 +1,315 @@

+# Claude Code Specification Directory
+This directory (`claude_spec/`) contains context documents, prompts, and artifacts specifically for Claude Code agent interactions. These files enable effective work continuation across sessions and provide structured context for AI-assisted development.
+---
+## Directory Purpose
+The `claude_spec/` directory serves as a communication layer between:
+- **Human developers** → Claude Code (via prompts)
+- **Claude Code** → Future Claude Code sessions (via handoff documents)
+- **Claude Code** → Human developers (via planning documents)
+All files in this directory are treated as high-priority context by Claude Code and should be referenced when starting new sessions or planning work.
+---
+## File Naming Conventions
+### `prompt-*.md`
+**Purpose:** Human-generated prompts for Claude Code
+**Created by:** Human developers
+**Used by:** Claude Code as initial task instructions
+**Naming pattern:** `prompt-<feature-name>.md`
+**Description:**
+These files contain initial task descriptions, requirements, and context written by humans to guide Claude Code in implementing specific features or components. They serve as the starting point for a development session.
+**Example:**
+- `prompt-docker-executor.md` - Instructions for implementing the Docker execution client
+**Best practices:**
+- Write clear, specific requirements
+- Include acceptance criteria
+- Reference related documentation or files
+- Specify any constraints or architectural decisions
+---
+### `implementation-status-*.md`
+**Purpose:** Handoff documents detailing completed work and next steps
+**Created by:** Claude Code at session end
+**Used by:** Claude Code in future sessions, Human developers for review
+**Naming pattern:** `implementation-status-<component-name>.md`
+**Description:**
+These are comprehensive handoff documents that capture:
+- **Session start state:** What the codebase looked like before work began
+- **Executive summary:** High-level overview of changes made
+- **Technical details:** How and why changes were implemented
+- **Next steps:** Prioritized actions for continuing work
+- **Known issues:** Documented limitations or problems
+- **Verification:** Test results and validation steps
+**Examples:**
+- `implementation-status-agent-api.md` - Agent API implementation handoff
+- `implementation-status-mcp-server.md` - MCP server implementation handoff
+- `implementation-status-skills-system.md` - Skills system implementation handoff
+- `implementation-status-agent-api-testing.md` - Agent API testing handoff
+**Structure:**
+```markdown
+# Handoff Document: [Feature Name]
+**Session Date:** YYYY-MM-DD
+**Completion Status:** X% complete
+## Session Start: Repository State
+- Context about starting state
+- Files modified/created
+- Known issues at start
+## Executive Summary: Changes Made
+- High-level overview
+- Key accomplishments
+- Metrics
+## Technical Details: Implementation
+- Detailed explanation of changes
+- Code examples
+- Architecture diagrams
+- Design decisions and rationale
+## Verification & Testing
+- Test results
+- Manual validation steps
+## Next Steps: Recommended Actions
+- Prioritized task list
+- Estimated effort
+- Dependencies
+## Critical Context for Next Session
+- Environment setup
+- Key files to know
+- Integration points
+## Known Issues & Limitations
+- Documented problems
+- Workarounds
+- Future improvements
+```
+**Best practices:**
+- Write immediately after completing work
+- Be specific about what was done and why
+- Include code snippets for key changes
+- Document all known issues
+- Provide actionable next steps
+- Include verification steps
+---
+### `implementation-plan-*.md`
+**Purpose:** Planning documents generated by Claude Code in planning mode
+**Created by:** Claude Code during planning phase
+**Used by:** Claude Code for implementation, Human developers for review/approval
+**Naming pattern:** `implementation-plan-<feature-name>.md`
+**Description:**
+These documents are created when Claude Code enters planning mode for complex tasks. They contain:
+- **Current state analysis:** Understanding of existing codebase
+- **Proposed approach:** High-level implementation strategy
+- **File-by-file changes:** Detailed plan for modifications
+- **Design decisions:** Rationale for architectural choices
+- **Risk assessment:** Potential issues and mitigation
+- **Testing strategy:** How to verify the implementation
+**Examples:**
+- `implementation-plan-agent-api.md` - Planning document for Agent API implementation
+**Best practices:**
+- Create before starting complex work
+- Present multiple options when applicable
+- Explain trade-offs clearly
+- Get human approval before implementing
+- Update if approach changes during implementation
+---
+## Workflow
+### Starting a New Feature
+1. **Human** creates `prompt-<feature>.md` with requirements
+2. **Claude Code** reads prompt and assesses complexity
+3. **Claude Code** (if complex) creates `implementation-plan-<feature>.md`
+4. **Human** reviews and approves plan
+5. **Claude Code** implements according to plan
+6. **Claude Code** creates `implementation-status-<feature>.md` at session end
+### Continuing Work
+1. **Claude Code** reads relevant `implementation-status-*.md` files, as specified by human
+2. **Claude Code** reviews "Next Steps" section
+3. **Human** provides additional context or priorities
+4. **Claude Code** continues work
+5. **Claude Code** creates new `implementation-status-*.md` at session end
+---
+## File Lifecycle
+### Prompt Files
+- **Created:** Before work begins
+- **Updated:** When additional context or requirements not initially provided are needed
+- **Deleted:** Never (historical record)
+### Implementation Status Files
+- **Created:** End of each major work session
+- **Updated:** Only when additional work is done in the same session after the document was initially created
+- **Deleted:** Never (historical record)
+### Implementation Plan Files
+- **Created:** During planning phase for complex features
+- **Updated:** If approach changes significantly
+- **Deleted:** Never (historical record)
+---
+### Documentation
+**Relationship to docs/:**
+- `claude_spec/` = Internal context for Claude Code
+- `docs/` = External documentation for users/developers
+- Implementation status may reference docs files
+- Public-facing docs should be created separately
+---
+## Best Practices
+### For Human Developers
+**Writing Prompts:**
+- Be specific about requirements
+- Include relevant context (file paths, existing patterns)
+- Specify constraints or preferences
+- Provide examples when helpful
+**Reviewing Plans:**
+## Integration with Development Workflow
+### Version Control
+**Commit Strategy:**
+- Commit `prompt-*.md` files separately from code
+- Commit `implementation-plan-*.md` with "Plan:" prefix
+- Commit `implementation-status-*.md` with "Handoff:" prefix
+**Example:**
+```bash
+git add claude_spec/prompt-new-feature.md
+git commit -m "Plan: Add requirements for new feature"
+git add claude_spec/implementation-plan-new-feature.md
+git commit -m "Plan: Design approach for new feature"
+git add src/ tests/ claude_spec/implementation-status-new-feature.md
+git commit -m "Implement new feature
+- Add core functionality
+- Add tests
+- Update documentation
+Handoff: claude_spec/implementation-status-new-feature.md"
+```
+### Pull Requests
+**When creating PRs:**
+- Link to relevant `implementation-status-*.md` in PR description
+- Reference `implementation-plan-*.md` if major architectural changes
+- Include "Next Steps" from handoff document in PR comments
+- Check for security implications
+- Validate architectural decisions
+- Consider maintenance burden
+- Verify test coverage plans
+**Using Handoffs:**
+- Read "Next Steps" before continuing work
+- Review "Known Issues" to avoid duplicate effort
+- Check "Critical Context" for environment setup
+- Validate against "Verification" section
+### For Claude Code
+**Creating Status Documents:**
+- Write immediately after completing work (while fresh)
+- Be thorough in technical details
+- Include actual code snippets (not pseudocode)
+- Document all decisions and rationale
+- Provide actionable next steps
+- Test all verification steps
+**Creating Plans:**
+- Analyze existing code before planning
+- Present multiple options with trade-offs
+- Explain design decisions
+- Consider security and performance
+- Plan for testing and validation
+- Update if approach changes
+**Reading Context:**
+- Always check for relevant `implementation-status-*.md` files
+- Read "Next Steps" sections for priorities
+- Note any "Known Issues" to avoid
+- Check "Critical Context" for setup requirements
+---
+## Directory Structure Example
+```
+claude_spec/
+├── CLAUDE.md                                    # This file
+├── README.md                                    # Brief overview
+│
+├── prompt-docker-executor.md                    # Initial requirements
+├── implementation-plan-agent-api.md             # Planning document
+│
+├── implementation-status-docker-executor.md     # Component handoff
+├── implementation-status-mcp-server.md          # Component handoff
+├── implementation-status-agent-api.md           # Component handoff
+├── implementation-status-agent-api-testing.md   # Testing handoff
+└── implementation-status-skills-system.md       # Component handoff
+```
+---
+## Conventions Summary
+| Prefix | Creator | Purpose | Lifecycle |
+|--------|---------|---------|-----------|
+| `prompt-` | Human | Initial task instructions | Created once, archived after use |
+| `implementation-status-` | Claude Code | Session handoff with next steps | Created/updated per session |
+| `implementation-plan-` | Claude Code | Planning document for complex tasks | Created in planning mode |
+---
+**Last Updated:** November 28, 2025
+**Maintained By:** Development Team + Claude Code
+**Purpose:** Enable effective AI-assisted development across sessions

generate_diagram.py CHANGED Viewed

@@ -120,7 +120,7 @@ def generate_diagram():
     # Generate PNG
     output_png = docs_dir / "architecture.png"
-    print(f"\nGenerating PNG diagram...")
     try:
         subprocess.run(
             [
@@ -143,7 +143,7 @@ def generate_diagram():
     # Generate SVG
     output_svg = docs_dir / "architecture.svg"
-    print(f"\nGenerating SVG diagram...")
     try:
         subprocess.run(
             [
@@ -165,11 +165,11 @@ def generate_diagram():
     print("\n" + "=" * 80)
     print("SUCCESS! Diagram generated successfully")
     print("=" * 80)
-    print(f"\nFiles created:")
     print(f"  - {output_png}")
     print(f"  - {output_svg}")
-    print(f"\nAdd to README.md:")
-    print(f'  ![Architecture Diagram](docs/architecture.png)')
     print("=" * 80 + "\n")
     return True

     # Generate PNG
     output_png = docs_dir / "architecture.png"
+    print("\nGenerating PNG diagram...")
     try:
         subprocess.run(
             [
     # Generate SVG
     output_svg = docs_dir / "architecture.svg"
+    print("\nGenerating SVG diagram...")
     try:
         subprocess.run(
             [
     print("\n" + "=" * 80)
     print("SUCCESS! Diagram generated successfully")
     print("=" * 80)
+    print("\nFiles created:")
     print(f"  - {output_png}")
     print(f"  - {output_svg}")
+    print("\nAdd to README.md:")
+    print("  ![Architecture Diagram](docs/architecture.png)")
     print("=" * 80 + "\n")
     return True

mcp_server/docker_client.py CHANGED Viewed

@@ -228,9 +228,7 @@ class DockerExecutionClient:
         loop = asyncio.get_event_loop()
         return await loop.run_in_executor(None, _get_docstring)
-    async def read_file_docstring(
-        self, user_id: str, file_path: str, function_name: str
-    ) -> str:
         """Read the docstring of a function from a Python file in the user's container.
         Args:

         loop = asyncio.get_event_loop()
         return await loop.run_in_executor(None, _get_docstring)
+    async def read_file_docstring(self, user_id: str, file_path: str, function_name: str) -> str:
         """Read the docstring of a function from a Python file in the user's container.
         Args:

mcp_server/server.py CHANGED Viewed

@@ -8,17 +8,15 @@ import logging
 from contextlib import asynccontextmanager
 from typing import Annotated
-from fastmcp import FastMCP
-from fastmcp import Context
 from docker_client import DockerExecutionClient
-from starlette.responses import JSONResponse
 from starlette.requests import Request
 from utils import (
-    list_available_skills,
-    get_skill,
-    generate_skills_section,
     generate_agent_prompt,
 )
 # Configure logging
@@ -143,7 +141,7 @@ async def execute_bash(
         return {
             "exit_code": -1,
             "stdout": "",
-            "stderr": f"Error: {str(e)}",
         }
@@ -326,11 +324,13 @@ async def health_check(request: Request):
     Returns:
         JSON response with server status
     """
-    return JSONResponse({
-        "status": "healthy",
-        "service": "mcp-code-executor",
-        "client_initialized": docker_client is not None,
-    })
 # Skills endpoints
@@ -346,15 +346,20 @@ async def list_skills(request: Request):
     """
     try:
         skills = list_available_skills()
-        return JSONResponse({
-            "skills": skills,
-            "count": len(skills),
-        })
     except Exception as e:
         logger.error(f"Error listing skills: {e}")
-        return JSONResponse({
-            "error": str(e),
-        }, status_code=500)
 @mcp.custom_route("/skills/{skill_name}", methods=["GET"])
@@ -372,14 +377,20 @@ async def get_skill_by_name(request: Request):
         skill_data = get_skill(skill_name)
         return JSONResponse(skill_data)
     except FileNotFoundError as e:
-        return JSONResponse({
-            "error": str(e),
-        }, status_code=404)
     except Exception as e:
         logger.error(f"Error retrieving skill {skill_name}: {e}")
-        return JSONResponse({
-            "error": str(e),
-        }, status_code=500)
 if __name__ == "__main__":

 from contextlib import asynccontextmanager
 from typing import Annotated
 from docker_client import DockerExecutionClient
+from fastmcp import Context, FastMCP
 from starlette.requests import Request
+from starlette.responses import JSONResponse
 from utils import (
     generate_agent_prompt,
+    generate_skills_section,
+    get_skill,
+    list_available_skills,
 )
 # Configure logging
         return {
             "exit_code": -1,
             "stdout": "",
+            "stderr": f"Error: {e!s}",
         }
     Returns:
         JSON response with server status
     """
+    return JSONResponse(
+        {
+            "status": "healthy",
+            "service": "mcp-code-executor",
+            "client_initialized": docker_client is not None,
+        }
+    )
 # Skills endpoints
     """
     try:
         skills = list_available_skills()
+        return JSONResponse(
+            {
+                "skills": skills,
+                "count": len(skills),
+            }
+        )
     except Exception as e:
         logger.error(f"Error listing skills: {e}")
+        return JSONResponse(
+            {
+                "error": str(e),
+            },
+            status_code=500,
+        )
 @mcp.custom_route("/skills/{skill_name}", methods=["GET"])
         skill_data = get_skill(skill_name)
         return JSONResponse(skill_data)
     except FileNotFoundError as e:
+        return JSONResponse(
+            {
+                "error": str(e),
+            },
+            status_code=404,
+        )
     except Exception as e:
         logger.error(f"Error retrieving skill {skill_name}: {e}")
+        return JSONResponse(
+            {
+                "error": str(e),
+            },
+            status_code=500,
+        )
 if __name__ == "__main__":

mcp_server/utils/__init__.py CHANGED Viewed

@@ -1,19 +1,19 @@
 """Utility modules for MCP server."""
 from .skill_utils import (
-    parse_skill_frontmatter,
-    get_skill,
-    list_available_skills,
     extract_use_cases,
-    generate_skills_section,
     generate_agent_prompt,
 )
 __all__ = [
-    "parse_skill_frontmatter",
-    "get_skill",
-    "list_available_skills",
     "extract_use_cases",
-    "generate_skills_section",
     "generate_agent_prompt",
 ]

 """Utility modules for MCP server."""
 from .skill_utils import (
     extract_use_cases,
     generate_agent_prompt,
+    generate_skills_section,
+    get_skill,
+    list_available_skills,
+    parse_skill_frontmatter,
 )
 __all__ = [
     "extract_use_cases",
     "generate_agent_prompt",
+    "generate_skills_section",
+    "get_skill",
+    "list_available_skills",
+    "parse_skill_frontmatter",
 ]

mcp_server/utils/skill_utils.py CHANGED Viewed

@@ -17,7 +17,7 @@ def parse_skill_frontmatter(content: str) -> tuple[dict, str]:
         Tuple of (metadata_dict, content_without_frontmatter)
     """
     # Match YAML frontmatter between --- markers
-    pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
     match = re.match(pattern, content, re.DOTALL)
     if not match:
@@ -27,10 +27,10 @@ def parse_skill_frontmatter(content: str) -> tuple[dict, str]:
     # Parse YAML-like frontmatter (simple key: value pairs)
     metadata = {}
-    for line in frontmatter.split('\n'):
         line = line.strip()
-        if ':' in line:
-            key, value = line.split(':', 1)
             metadata[key.strip()] = value.strip()
     return metadata, body.strip()
@@ -92,12 +92,14 @@ def list_available_skills() -> list[dict]:
                 try:
                     skill_data = get_skill(skill_dir.name)
                     # Return lightweight metadata for listing
-                    skills.append({
-                        "skill_id": skill_data["skill_id"],
-                        "name": skill_data["name"],
-                        "description": skill_data["description"],
-                        "version": skill_data["version"],
-                    })
                 except Exception as e:
                     logger.error(f"Error loading skill {skill_dir.name}: {e}")
@@ -114,7 +116,9 @@ def extract_use_cases(content: str) -> str:
         Formatted use cases section, or empty string if not found
     """
     # Look for "When to Use This Skill" section
-    pattern = r'## When to Use This Skill\s*\n\s*(?:Invoke this skill when.*?:)?\s*\n((?:[-*]\s+.+\n?)+)'
     match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
     if match:
@@ -138,24 +142,24 @@ def generate_skills_section(skills: list[dict]) -> str:
     sections = []
     for skill in skills:
-        skill_id = skill['skill_id']
-        name = skill['name']
-        version = skill['version']
-        description = skill['description']
         # Get full skill data for dependencies
         try:
             full_skill = get_skill(skill_id)
-            dependencies = full_skill.get('dependencies', 'None')
             # Extract "When to Use" section from skill content if available
-            content = full_skill.get('content', '')
             use_cases = extract_use_cases(content)
         except Exception as e:
             logger.warning(f"Could not load full skill data for {skill_id}: {e}")
-            dependencies = 'Unknown'
-            use_cases = ''
         # Format skill section
         section = f"""---
@@ -170,7 +174,7 @@ def generate_skills_section(skills: list[dict]) -> str:
 """
         sections.append(section)
-    return '\n'.join(sections)
 def generate_agent_prompt(skills_section: str) -> str:

         Tuple of (metadata_dict, content_without_frontmatter)
     """
     # Match YAML frontmatter between --- markers
+    pattern = r"^---\s*\n(.*?)\n---\s*\n(.*)$"
     match = re.match(pattern, content, re.DOTALL)
     if not match:
     # Parse YAML-like frontmatter (simple key: value pairs)
     metadata = {}
+    for line in frontmatter.split("\n"):
         line = line.strip()
+        if ":" in line:
+            key, value = line.split(":", 1)
             metadata[key.strip()] = value.strip()
     return metadata, body.strip()
                 try:
                     skill_data = get_skill(skill_dir.name)
                     # Return lightweight metadata for listing
+                    skills.append(
+                        {
+                            "skill_id": skill_data["skill_id"],
+                            "name": skill_data["name"],
+                            "description": skill_data["description"],
+                            "version": skill_data["version"],
+                        }
+                    )
                 except Exception as e:
                     logger.error(f"Error loading skill {skill_dir.name}: {e}")
         Formatted use cases section, or empty string if not found
     """
     # Look for "When to Use This Skill" section
+    pattern = (
+        r"## When to Use This Skill\s*\n\s*(?:Invoke this skill when.*?:)?\s*\n((?:[-*]\s+.+\n?)+)"
+    )
     match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
     if match:
     sections = []
     for skill in skills:
+        skill_id = skill["skill_id"]
+        name = skill["name"]
+        version = skill["version"]
+        description = skill["description"]
         # Get full skill data for dependencies
         try:
             full_skill = get_skill(skill_id)
+            dependencies = full_skill.get("dependencies", "None")
             # Extract "When to Use" section from skill content if available
+            content = full_skill.get("content", "")
             use_cases = extract_use_cases(content)
         except Exception as e:
             logger.warning(f"Could not load full skill data for {skill_id}: {e}")
+            dependencies = "Unknown"
+            use_cases = ""
         # Format skill section
         section = f"""---
 """
         sections.append(section)
+    return "\n".join(sections)
 def generate_agent_prompt(skills_section: str) -> str:

tests/test_agent_api/conftest.py CHANGED Viewed

@@ -106,7 +106,7 @@ def openai_client():
     """Create OpenAI client for testing Agent API."""
     return OpenAI(
         base_url="http://localhost:8000/v1",
-        api_key="dummy"  # Not used but required by SDK
     )

     """Create OpenAI client for testing Agent API."""
     return OpenAI(
         base_url="http://localhost:8000/v1",
+        api_key="dummy",  # Not used but required by SDK
     )

tests/test_agent_api/test_direct_agent.py CHANGED Viewed

@@ -1,8 +1,10 @@
 """Direct test to see all agent output including errors."""
 import asyncio
 import httpx
 async def test_agent():
     """Test agent and print all logs."""
     async with httpx.AsyncClient(timeout=30.0) as client:
@@ -11,13 +13,13 @@ async def test_agent():
         print("=" * 80)
         async with client.stream(
-            'POST',
-            'http://localhost:8000/v1/chat/completions',
             json={
-                'model': 'claude-3-5-sonnet-20241022',
-                'messages': [{'role': 'user', 'content': 'Execute this Python code: print(2 + 2)'}],
-                'stream': True,
-            }
         ) as response:
             print(f"Status: {response.status_code}")
             print(f"Headers: {dict(response.headers)}")
@@ -28,14 +30,15 @@ async def test_agent():
             async for line in response.aiter_lines():
                 if line.strip():
                     print(f"CHUNK: {line}")
-                    if line.startswith('data: ') and not line.startswith('data: [DONE]'):
                         import json
                         try:
                             data = json.loads(line[6:])
-                            if 'choices' in data and len(data['choices']) > 0:
-                                delta = data['choices'][0].get('delta', {})
-                                if 'content' in delta:
-                                    full_response += delta['content']
                         except:
                             pass
@@ -43,5 +46,6 @@ async def test_agent():
             print(f"\nFull response:\n{full_response}")
             print("=" * 80)
 if __name__ == "__main__":
     asyncio.run(test_agent())

 """Direct test to see all agent output including errors."""
 import asyncio
 import httpx
 async def test_agent():
     """Test agent and print all logs."""
     async with httpx.AsyncClient(timeout=30.0) as client:
         print("=" * 80)
         async with client.stream(
+            "POST",
+            "http://localhost:8000/v1/chat/completions",
             json={
+                "model": "claude-3-5-sonnet-20241022",
+                "messages": [{"role": "user", "content": "Execute this Python code: print(2 + 2)"}],
+                "stream": True,
+            },
         ) as response:
             print(f"Status: {response.status_code}")
             print(f"Headers: {dict(response.headers)}")
             async for line in response.aiter_lines():
                 if line.strip():
                     print(f"CHUNK: {line}")
+                    if line.startswith("data: ") and not line.startswith("data: [DONE]"):
                         import json
                         try:
                             data = json.loads(line[6:])
+                            if "choices" in data and len(data["choices"]) > 0:
+                                delta = data["choices"][0].get("delta", {})
+                                if "content" in delta:
+                                    full_response += delta["content"]
                         except:
                             pass
             print(f"\nFull response:\n{full_response}")
             print("=" * 80)
 if __name__ == "__main__":
     asyncio.run(test_agent())

tests/test_agent_api/test_server.py CHANGED Viewed

@@ -9,7 +9,6 @@ These tests require:
 import httpx
 import pytest
-from openai import OpenAI
 class TestHealthEndpoints:
@@ -202,9 +201,7 @@ class TestSessionManagement:
         # Second turn: reference previous context
         stream2 = openai_client.chat.completions.create(
             model=test_model,
-            messages=[
-                {"role": "user", "content": "Now read the file you just created"}
-            ],
             stream=True,
             user="test-session-123",  # Same user ID for context
         )

 import httpx
 import pytest
 class TestHealthEndpoints:
         # Second turn: reference previous context
         stream2 = openai_client.chat.completions.create(
             model=test_model,
+            messages=[{"role": "user", "content": "Now read the file you just created"}],
             stream=True,
             user="test-session-123",  # Same user ID for context
         )

tests/test_config_loading.py DELETED Viewed

@@ -1,163 +0,0 @@
-#!/usr/bin/env python3
-"""Demonstrate how .env values propagate through the system."""
-import os
-import sys
-from pathlib import Path
-def load_dotenv():
-    """Load .env file into environment variables."""
-    env_path = Path(__file__).parent / ".env"
-    if not env_path.exists():
-        print(f"⚠️  .env file not found at {env_path}")
-        return False
-    # Simple .env parser (avoids dependency on python-dotenv)
-    with open(env_path) as f:
-        for line in f:
-            line = line.strip()
-            # Skip comments and empty lines
-            if not line or line.startswith('#'):
-                continue
-            # Parse KEY=VALUE
-            if '=' in line:
-                key, value = line.split('=', 1)
-                key = key.strip()
-                value = value.strip()
-                # Set in environment
-                os.environ[key] = value
-    return True
-def test_config_loading():
-    """Show how .env values get loaded and used."""
-    print("=" * 70)
-    print("Configuration Loading Test")
-    print("=" * 70)
-    print()
-    # Step 0: Load .env file into environment
-    print("STEP 0: Loading .env File")
-    print("-" * 70)
-    if load_dotenv():
-        print("  ✓ .env file loaded into environment variables")
-    else:
-        print("  ✗ Failed to load .env file")
-        return False
-    print()
-    # Step 1: Show raw environment variables (after loading .env)
-    print("STEP 1: Raw Environment Variables (from .env)")
-    print("-" * 70)
-    env_vars = [
-        "ANTHROPIC_API_KEY",
-        "DEFAULT_MODEL",
-        "AGENT_API_HOST",
-        "AGENT_API_PORT",
-        "MCP_SERVER_URL",
-        "SESSION_TIMEOUT_SECONDS",
-    ]
-    for var in env_vars:
-        value = os.getenv(var, "NOT SET")
-        # Mask API keys
-        if "API_KEY" in var and value != "NOT SET":
-            value = value[:10] + "..." + value[-4:] if len(value) > 14 else "***"
-        print(f"  {var:30} = {value}")
-    print()
-    # Step 2: Import settings (this triggers .env loading)
-    print("STEP 2: Importing settings from agent_api.config")
-    print("-" * 70)
-    print("  Executing: from agent_api.config import settings")
-    try:
-        from agent_api.config import settings
-        print("  ✓ Settings imported successfully")
-    except Exception as e:
-        print(f"  ✗ Failed to import settings: {e}")
-        return False
-    print()
-    # Step 3: Show Pydantic settings object
-    print("STEP 3: Pydantic Settings Object Values")
-    print("-" * 70)
-    print(f"  settings.agent_api_host         = {settings.agent_api_host}")
-    print(f"  settings.agent_api_port         = {settings.agent_api_port}")
-    print(f"  settings.mcp_server_url         = {settings.mcp_server_url}")
-    print(f"  settings.default_model          = {settings.default_model}")
-    print(f"  settings.agent_name             = {settings.agent_name}")
-    print(f"  settings.session_timeout_seconds = {settings.session_timeout_seconds}")
-    print()
-    # Step 4: Show how these values would be used
-    print("STEP 4: How Values Are Used in Code")
-    print("-" * 70)
-    print("\n  In agent_api/server.py:")
-    print(f"    AgentManager(mcp_server_url='{settings.mcp_server_url}')")
-    print(f"    logger.info('Default Model: {settings.default_model}')")
-    print("\n  In agent_api/agent_manager.py:")
-    print(f"    model = LiteLlm(model='{settings.default_model}')")
-    print(f"    agent = Agent(name='{settings.agent_name}', ...)")
-    print()
-    # Step 5: Verify API key in environment
-    print("STEP 5: API Key Availability for LiteLLM")
-    print("-" * 70)
-    api_key = os.getenv("ANTHROPIC_API_KEY")
-    if api_key:
-        masked = api_key[:10] + "..." + api_key[-4:] if len(api_key) > 14 else "***"
-        print(f"  ✓ ANTHROPIC_API_KEY found in environment: {masked}")
-        print("  → LiteLLM will use this key automatically")
-    else:
-        print("  ✗ ANTHROPIC_API_KEY not found in environment")
-        print("  → LiteLLM will fail when trying to use Anthropic models")
-    print()
-    # Step 6: Summary
-    print("=" * 70)
-    print("SUMMARY: How .env Values Propagate")
-    print("=" * 70)
-    print()
-    print("TWO PARALLEL PATHS:")
-    print()
-    print("Path 1: For Agent API Configuration")
-    print("  .env file → Pydantic BaseSettings → settings object")
-    print("  - Pydantic reads .env automatically when Settings() is created")
-    print("  - Values available as: settings.agent_api_port, settings.default_model, etc.")
-    print("  - Used by: server.py, agent_manager.py, session_store.py")
-    print()
-    print("Path 2: For API Keys (LiteLLM)")
-    print("  .env file → os.environ → LiteLLM")
-    print("  - Need to manually load .env (or use python-dotenv)")
-    print("  - API keys read from os.environ by LiteLLM")
-    print("  - Used by: LiteLlm() when creating model")
-    print()
-    print("IMPORTANT: Pydantic does NOT set os.environ!")
-    print("  - settings.default_model works ✓")
-    print("  - os.getenv('DEFAULT_MODEL') may not work ✗ (unless manually loaded)")
-    print()
-    print("All .env values are now available throughout the application!")
-    print()
-    return True
-if __name__ == "__main__":
-    # Make sure we can import from agent_api
-    sys.path.insert(0, "/Users/mohardey/Projects/code-execution-with-mcp")
-    success = test_config_loading()
-    sys.exit(0 if success else 1)

tests/test_mcp_server/conftest.py CHANGED Viewed

@@ -50,5 +50,6 @@ async def cleanup_containers():
     # Cleanup after tests complete
     import server
     if server.docker_client:
         server.docker_client.cleanup_all()

     # Cleanup after tests complete
     import server
     if server.docker_client:
         server.docker_client.cleanup_all()

tests/test_mcp_server/test_server.py CHANGED Viewed

@@ -152,11 +152,11 @@ class TestWriteFileTool:
     async def test_write_python_script(self, mcp_client: FastMCPClient):
         """Test writing a Python script with special characters and executing it."""
-        python_code = '''def greet(name):
     return f"Hello, {name}!"
 print(greet("World"))
-'''
         # Write the Python script
         write_result = await mcp_client.call_tool(
@@ -237,15 +237,13 @@ class TestReadFileTool:
     @pytest.mark.parametrize(
         "offset,line_count",
         [
-            (0, 1),   # First line only
-            (0, 3),   # First three lines
-            (2, 2),   # Lines 3-4
             (4, 10),  # Last line (more than available)
         ],
     )
-    async def test_read_pagination(
-        self, mcp_client: FastMCPClient, offset: int, line_count: int
-    ):
         """Test file reading with various pagination parameters."""
         content = "\n".join([f"Line {i}" for i in range(1, 11)])
@@ -347,7 +345,11 @@ class TestReadDocstringTool:
         ],
     )
     async def test_read_various_docstrings(
-        self, mcp_client: FastMCPClient, function_name: str, function_code: str, expected_in_docstring: str
     ):
         """Test reading docstrings from various functions."""
         # Write the function to a file
@@ -457,8 +459,8 @@ class TestUserIsolation:
     async def test_different_users_isolated(self, monkeypatch):
         """Test that different users cannot access each other's files."""
-        from fastmcp.client import Client as FastMCPClient
         import server
         # Test with user1
         def mock_get_user_id_user1(ctx):
@@ -558,8 +560,8 @@ This is the skill body.
     def test_get_nonexistent_skill(self):
         """Test requesting a skill that doesn't exist."""
-        from utils.skill_utils import get_skill
         import pytest
         with pytest.raises(FileNotFoundError, match="not found"):
             get_skill("nonexistent-skill")

     async def test_write_python_script(self, mcp_client: FastMCPClient):
         """Test writing a Python script with special characters and executing it."""
+        python_code = """def greet(name):
     return f"Hello, {name}!"
 print(greet("World"))
+"""
         # Write the Python script
         write_result = await mcp_client.call_tool(
     @pytest.mark.parametrize(
         "offset,line_count",
         [
+            (0, 1),  # First line only
+            (0, 3),  # First three lines
+            (2, 2),  # Lines 3-4
             (4, 10),  # Last line (more than available)
         ],
     )
+    async def test_read_pagination(self, mcp_client: FastMCPClient, offset: int, line_count: int):
         """Test file reading with various pagination parameters."""
         content = "\n".join([f"Line {i}" for i in range(1, 11)])
         ],
     )
     async def test_read_various_docstrings(
+        self,
+        mcp_client: FastMCPClient,
+        function_name: str,
+        function_code: str,
+        expected_in_docstring: str,
     ):
         """Test reading docstrings from various functions."""
         # Write the function to a file
     async def test_different_users_isolated(self, monkeypatch):
         """Test that different users cannot access each other's files."""
         import server
+        from fastmcp.client import Client as FastMCPClient
         # Test with user1
         def mock_get_user_id_user1(ctx):
     def test_get_nonexistent_skill(self):
         """Test requesting a skill that doesn't exist."""
         import pytest
+        from utils.skill_utils import get_skill
         with pytest.raises(FileNotFoundError, match="not found"):
             get_skill("nonexistent-skill")