jkadowak commited on
Commit
aa122d4
·
1 Parent(s): 61bb790

add better tests, restructure spec, add dynamic prompt

Browse files
README.md CHANGED
@@ -19,19 +19,77 @@ A secure, containerized code execution platform for AI agents with multi-user is
19
 
20
  ## Architecture
21
 
22
- ![Architecture Diagram](https://huggingface.co/spaces/MCP-1st-Birthday/code-execution-with-mcp/blob/refs%2Fpr%2F2/docs/architecture.png)
23
-
24
- <details>
25
- <summary>View/Edit Diagram Source</summary>
26
-
27
- The diagram source is maintained in [generate_diagram.py](generate_diagram.py). To regenerate the diagram after making changes:
28
-
29
- ```bash
30
- python3 generate_diagram.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  ```
32
 
33
- </details>
34
-
35
  ## Key Features
36
 
37
  ### Security & Isolation
 
19
 
20
  ## Architecture
21
 
22
+ ```mermaid
23
+ flowchart TB
24
+ subgraph Frontend["🎨 Frontend Layer"]
25
+ UI["💬 Gradio UI<br/>━━━━━━━━━━━<br/>👤 User Authentication<br/>📡 Real-time Streaming<br/>📊 Artifact Viewer"]
26
+ end
27
+
28
+ subgraph AgentAPI["🤖 Agent API Layer"]
29
+ API["⚡ OpenAI-Compatible API<br/>━━━━━━━━━━━<br/>🔥 FastAPI + Google ADK<br/>💭 Agent Reasoning Loop<br/>🌊 Streaming Responses"]
30
+ end
31
+
32
+ subgraph MCPServer["🔧 MCP Server Layer"]
33
+ MCP{"🎯 FastMCP Server<br/>━━━━━━━━━━━<br/>🛠️ Tool Registry<br/>👥 User Context"}
34
+ T1[["⚙️ execute_bash<br/>Run Commands"]]
35
+ T2[["📖 read_file<br/>Read Files"]]
36
+ T3[["✍️ write_file<br/>Write Files"]]
37
+ T4[["📚 read_docstring<br/>Get Docs"]]
38
+ end
39
+
40
+ subgraph ExecClient["🐳 Execution Client Layer"]
41
+ CLIENT[("🎮 DockerExecutionClient<br/>━━━━━━━━━━━<br/>📦 Container Manager<br/>⚡ Async Executor<br/>🔐 User Isolation")]
42
+ end
43
+
44
+ subgraph Containers["🏠 Container Isolation Layer"]
45
+ C1{{"🐍 User Container 1<br/>━━━━━━━━━━━<br/>Python 3.12<br/>👤 Non-root User<br/>📁 /workspace"}}
46
+ CN{{"🐍 User Container N<br/>━━━━━━━━━━━<br/>Python 3.12<br/>👤 Non-root User<br/>📁 /workspace"}}
47
+ end
48
+
49
+ subgraph Resources["📦 Shared Resources"]
50
+ TOOLS[["🔨 Tools Directory<br/>🔒 Read-only"]]
51
+ SKILLS[["✨ Skills Directory<br/>🔒 Read-only"]]
52
+ end
53
+
54
+ UI ==>|"📨 HTTP Requests"| API
55
+ API ==>|"🔌 MCP Protocol"| MCP
56
+ MCP -.->|invoke| T1
57
+ MCP -.->|invoke| T2
58
+ MCP -.->|invoke| T3
59
+ MCP -.->|invoke| T4
60
+ T1 -->|execute| CLIENT
61
+ T2 -->|execute| CLIENT
62
+ T3 -->|execute| CLIENT
63
+ T4 -->|execute| CLIENT
64
+ CLIENT ==>|"🚀 Creates & Manages"| C1
65
+ CLIENT ==>|"🚀 Creates & Manages"| CN
66
+
67
+ TOOLS -.-o|"📌 mount /tools"| C1
68
+ TOOLS -.-o|"📌 mount /tools"| CN
69
+ SKILLS -.-o|"📌 mount /skills"| C1
70
+ SKILLS -.-o|"📌 mount /skills"| CN
71
+
72
+ style UI fill:#e3f2fd,stroke:#1565c0,stroke-width:3px,color:#000
73
+ style API fill:#fff3e0,stroke:#e65100,stroke-width:3px,color:#000
74
+ style MCP fill:#f3e5f5,stroke:#6a1b9a,stroke-width:3px,color:#000
75
+ style CLIENT fill:#e8f5e9,stroke:#2e7d32,stroke-width:3px,color:#000
76
+ style C1 fill:#fff9c4,stroke:#f57f17,stroke-width:3px,color:#000
77
+ style CN fill:#fff9c4,stroke:#f57f17,stroke-width:3px,color:#000
78
+ style TOOLS fill:#ffebee,stroke:#c62828,stroke-width:3px,color:#000
79
+ style SKILLS fill:#fce4ec,stroke:#ad1457,stroke-width:3px,color:#000
80
+ style T1 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
81
+ style T2 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
82
+ style T3 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
83
+ style T4 fill:#b3e5fc,stroke:#0277bd,stroke-width:2px,color:#000
84
+
85
+ style Frontend fill:#e8eaf6,stroke:#3f51b5,stroke-width:3px,stroke-dasharray: 5 5
86
+ style AgentAPI fill:#fff8e1,stroke:#ff6f00,stroke-width:3px,stroke-dasharray: 5 5
87
+ style MCPServer fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px,stroke-dasharray: 5 5
88
+ style ExecClient fill:#e0f2f1,stroke:#00695c,stroke-width:3px,stroke-dasharray: 5 5
89
+ style Containers fill:#fffde7,stroke:#f9a825,stroke-width:3px,stroke-dasharray: 5 5
90
+ style Resources fill:#fce4ec,stroke:#c2185b,stroke-width:3px,stroke-dasharray: 5 5
91
  ```
92
 
 
 
93
  ## Key Features
94
 
95
  ### Security & Isolation
agent_api/agent_manager.py CHANGED
@@ -2,15 +2,14 @@
2
 
3
  import logging
4
 
 
 
5
  from fastmcp.client import Client as FastMCPClient
6
  from google.adk import Agent, Runner
7
  from google.adk.models.lite_llm import LiteLlm
8
  from google.adk.sessions import InMemorySessionService
9
  from google.adk.tools.mcp_tool import McpToolset, StreamableHTTPConnectionParams
10
 
11
- from cache import ttl_cache
12
- from config import settings
13
-
14
  logger = logging.getLogger(__name__)
15
 
16
 
@@ -62,9 +61,7 @@ class AgentManager:
62
 
63
  try:
64
  # Connect to MCP server using FastMCP client
65
- async with FastMCPClient(
66
- connection_params={"url": self.mcp_server_url}
67
- ) as client:
68
  # Fetch the agent_system_prompt
69
  result = await client.get_prompt("agent_system_prompt")
70
 
 
2
 
3
  import logging
4
 
5
+ from cache import ttl_cache
6
+ from config import settings
7
  from fastmcp.client import Client as FastMCPClient
8
  from google.adk import Agent, Runner
9
  from google.adk.models.lite_llm import LiteLlm
10
  from google.adk.sessions import InMemorySessionService
11
  from google.adk.tools.mcp_tool import McpToolset, StreamableHTTPConnectionParams
12
 
 
 
 
13
  logger = logging.getLogger(__name__)
14
 
15
 
 
61
 
62
  try:
63
  # Connect to MCP server using FastMCP client
64
+ async with FastMCPClient(connection_params={"url": self.mcp_server_url}) as client:
 
 
65
  # Fetch the agent_system_prompt
66
  result = await client.get_prompt("agent_system_prompt")
67
 
agent_api/cache.py CHANGED
@@ -3,7 +3,8 @@
3
  import functools
4
  import inspect
5
  import time
6
- from typing import Any, Callable, TypeVar
 
7
 
8
  T = TypeVar("T")
9
 
 
3
  import functools
4
  import inspect
5
  import time
6
+ from collections.abc import Callable
7
+ from typing import TypeVar
8
 
9
  T = TypeVar("T")
10
 
agent_api/config.py CHANGED
@@ -66,10 +66,10 @@ Be helpful, secure, and efficient!"""
66
  owner, _ = self.default_model.split("/", 1)
67
  except:
68
  owner = default_owner
69
-
70
  if owner:
71
  return owner
72
  return default_owner
73
-
74
 
75
  settings = Settings()
 
66
  owner, _ = self.default_model.split("/", 1)
67
  except:
68
  owner = default_owner
69
+
70
  if owner:
71
  return owner
72
  return default_owner
73
+
74
 
75
  settings = Settings()
agent_api/converters.py CHANGED
@@ -4,10 +4,9 @@ import json
4
  import logging
5
  import time
6
  import uuid
7
- from typing import AsyncGenerator
8
 
9
  from google.adk.events import Event
10
-
11
  from models import ChatCompletionChunk, Choice, DeltaContent
12
 
13
  logger = logging.getLogger(__name__)
 
4
  import logging
5
  import time
6
  import uuid
7
+ from collections.abc import AsyncGenerator
8
 
9
  from google.adk.events import Event
 
10
  from models import ChatCompletionChunk, Choice, DeltaContent
11
 
12
  logger = logging.getLogger(__name__)
agent_api/models.py CHANGED
@@ -1,8 +1,8 @@
1
  """OpenAI-compatible Pydantic models for Agent API."""
2
 
3
- from typing import Literal, Optional
4
 
5
- from pydantic import BaseModel, Field
6
 
7
 
8
  class ChatMessage(BaseModel):
@@ -18,17 +18,17 @@ class ChatCompletionRequest(BaseModel):
18
  model: str
19
  messages: list[ChatMessage]
20
  stream: Literal[True] = True # Only streaming supported
21
- temperature: Optional[float] = None
22
- max_tokens: Optional[int] = None
23
- user: Optional[str] = None # User identifier for session management
24
 
25
 
26
  class DeltaContent(BaseModel):
27
  """Delta content for streaming chunks."""
28
 
29
- role: Optional[str] = None
30
- content: Optional[str] = None
31
- tool_calls: Optional[list[dict]] = None
32
 
33
 
34
  class Choice(BaseModel):
@@ -36,7 +36,7 @@ class Choice(BaseModel):
36
 
37
  index: int
38
  delta: DeltaContent
39
- finish_reason: Optional[str] = None
40
 
41
 
42
  class ChatCompletionChunk(BaseModel):
@@ -47,7 +47,7 @@ class ChatCompletionChunk(BaseModel):
47
  created: int # Unix timestamp
48
  model: str
49
  choices: list[Choice]
50
- usage: Optional[dict] = None
51
 
52
 
53
  class ModelInfo(BaseModel):
 
1
  """OpenAI-compatible Pydantic models for Agent API."""
2
 
3
+ from typing import Literal
4
 
5
+ from pydantic import BaseModel
6
 
7
 
8
  class ChatMessage(BaseModel):
 
18
  model: str
19
  messages: list[ChatMessage]
20
  stream: Literal[True] = True # Only streaming supported
21
+ temperature: float | None = None
22
+ max_tokens: int | None = None
23
+ user: str | None = None # User identifier for session management
24
 
25
 
26
  class DeltaContent(BaseModel):
27
  """Delta content for streaming chunks."""
28
 
29
+ role: str | None = None
30
+ content: str | None = None
31
+ tool_calls: list[dict] | None = None
32
 
33
 
34
  class Choice(BaseModel):
 
36
 
37
  index: int
38
  delta: DeltaContent
39
+ finish_reason: str | None = None
40
 
41
 
42
  class ChatCompletionChunk(BaseModel):
 
47
  created: int # Unix timestamp
48
  model: str
49
  choices: list[Choice]
50
+ usage: dict | None = None
51
 
52
 
53
  class ModelInfo(BaseModel):
agent_api/server.py CHANGED
@@ -7,13 +7,12 @@ from contextlib import asynccontextmanager
7
  from datetime import datetime
8
 
9
  import httpx
10
- from fastapi import FastAPI, HTTPException, Request, status
11
- from fastapi.responses import JSONResponse, StreamingResponse
12
- from google.genai import types
13
-
14
  from agent_manager import AgentManager
15
  from config import settings
16
  from converters import convert_adk_events_to_openai, format_sse, format_sse_done
 
 
 
17
  from models import (
18
  ChatCompletionRequest,
19
  HealthResponse,
@@ -85,9 +84,7 @@ async def http_exception_handler(request: Request, exc: HTTPException):
85
  Returns:
86
  JSON response with error details
87
  """
88
- return JSONResponse(
89
- status_code=exc.status_code, content={"error": {"message": exc.detail}}
90
- )
91
 
92
 
93
  @app.exception_handler(Exception)
@@ -142,7 +139,9 @@ async def list_models():
142
  return ModelList(
143
  data=[
144
  ModelInfo(
145
- id=settings.default_model, created=int(time.time()), owned_by=settings.get_model_owner()
 
 
146
  )
147
  ]
148
  )
@@ -196,10 +195,7 @@ async def chat_completions(request: ChatCompletionRequest):
196
  logger.debug(f"User message: {user_message[:100]}...")
197
 
198
  # Convert string message to google.genai.types.Content
199
- message_content = types.Content(
200
- role="user",
201
- parts=[types.Part(text=user_message)]
202
- )
203
 
204
  # Stream events
205
  async def event_generator():
@@ -214,9 +210,7 @@ async def chat_completions(request: ChatCompletionRequest):
214
  )
215
 
216
  # Convert to OpenAI format
217
- openai_chunks = convert_adk_events_to_openai(
218
- events=adk_events, model=request.model
219
- )
220
 
221
  # Stream as SSE
222
  async for chunk in openai_chunks:
@@ -266,6 +260,7 @@ async def root():
266
 
267
  if __name__ == "__main__":
268
  import os
 
269
  import uvicorn
270
 
271
  uvicorn.run(
 
7
  from datetime import datetime
8
 
9
  import httpx
 
 
 
 
10
  from agent_manager import AgentManager
11
  from config import settings
12
  from converters import convert_adk_events_to_openai, format_sse, format_sse_done
13
+ from fastapi import FastAPI, HTTPException, Request, status
14
+ from fastapi.responses import JSONResponse, StreamingResponse
15
+ from google.genai import types
16
  from models import (
17
  ChatCompletionRequest,
18
  HealthResponse,
 
84
  Returns:
85
  JSON response with error details
86
  """
87
+ return JSONResponse(status_code=exc.status_code, content={"error": {"message": exc.detail}})
 
 
88
 
89
 
90
  @app.exception_handler(Exception)
 
139
  return ModelList(
140
  data=[
141
  ModelInfo(
142
+ id=settings.default_model,
143
+ created=int(time.time()),
144
+ owned_by=settings.get_model_owner(),
145
  )
146
  ]
147
  )
 
195
  logger.debug(f"User message: {user_message[:100]}...")
196
 
197
  # Convert string message to google.genai.types.Content
198
+ message_content = types.Content(role="user", parts=[types.Part(text=user_message)])
 
 
 
199
 
200
  # Stream events
201
  async def event_generator():
 
210
  )
211
 
212
  # Convert to OpenAI format
213
+ openai_chunks = convert_adk_events_to_openai(events=adk_events, model=request.model)
 
 
214
 
215
  # Stream as SSE
216
  async for chunk in openai_chunks:
 
260
 
261
  if __name__ == "__main__":
262
  import os
263
+
264
  import uvicorn
265
 
266
  uvicorn.run(
agent_api/session_store.py CHANGED
@@ -3,9 +3,8 @@
3
  import logging
4
  from datetime import datetime, timedelta
5
 
6
- from google.adk.sessions import InMemorySessionService
7
-
8
  from config import settings
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
3
  import logging
4
  from datetime import datetime, timedelta
5
 
 
 
6
  from config import settings
7
+ from google.adk.sessions import InMemorySessionService
8
 
9
  logger = logging.getLogger(__name__)
10
 
claude_spec/CLAUDE.md ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Claude Code Specification Directory
2
+
3
+ This directory (`claude_spec/`) contains context documents, prompts, and artifacts specifically for Claude Code agent interactions. These files enable effective work continuation across sessions and provide structured context for AI-assisted development.
4
+
5
+ ---
6
+
7
+ ## Directory Purpose
8
+
9
+ The `claude_spec/` directory serves as a communication layer between:
10
+ - **Human developers** → Claude Code (via prompts)
11
+ - **Claude Code** → Future Claude Code sessions (via handoff documents)
12
+ - **Claude Code** → Human developers (via planning documents)
13
+
14
+ All files in this directory are treated as high-priority context by Claude Code and should be referenced when starting new sessions or planning work.
15
+
16
+ ---
17
+
18
+ ## File Naming Conventions
19
+
20
+ ### `prompt-*.md`
21
+
22
+ **Purpose:** Human-generated prompts for Claude Code
23
+
24
+ **Created by:** Human developers
25
+
26
+ **Used by:** Claude Code as initial task instructions
27
+
28
+ **Naming pattern:** `prompt-<feature-name>.md`
29
+
30
+ **Description:**
31
+ These files contain initial task descriptions, requirements, and context written by humans to guide Claude Code in implementing specific features or components. They serve as the starting point for a development session.
32
+
33
+ **Example:**
34
+ - `prompt-docker-executor.md` - Instructions for implementing the Docker execution client
35
+
36
+ **Best practices:**
37
+ - Write clear, specific requirements
38
+ - Include acceptance criteria
39
+ - Reference related documentation or files
40
+ - Specify any constraints or architectural decisions
41
+
42
+ ---
43
+
44
+ ### `implementation-status-*.md`
45
+
46
+ **Purpose:** Handoff documents detailing completed work and next steps
47
+
48
+ **Created by:** Claude Code at session end
49
+
50
+ **Used by:** Claude Code in future sessions, Human developers for review
51
+
52
+ **Naming pattern:** `implementation-status-<component-name>.md`
53
+
54
+ **Description:**
55
+ These are comprehensive handoff documents that capture:
56
+ - **Session start state:** What the codebase looked like before work began
57
+ - **Executive summary:** High-level overview of changes made
58
+ - **Technical details:** How and why changes were implemented
59
+ - **Next steps:** Prioritized actions for continuing work
60
+ - **Known issues:** Documented limitations or problems
61
+ - **Verification:** Test results and validation steps
62
+
63
+ **Examples:**
64
+ - `implementation-status-agent-api.md` - Agent API implementation handoff
65
+ - `implementation-status-mcp-server.md` - MCP server implementation handoff
66
+ - `implementation-status-skills-system.md` - Skills system implementation handoff
67
+ - `implementation-status-agent-api-testing.md` - Agent API testing handoff
68
+
69
+ **Structure:**
70
+ ```markdown
71
+ # Handoff Document: [Feature Name]
72
+
73
+ **Session Date:** YYYY-MM-DD
74
+ **Completion Status:** X% complete
75
+
76
+ ## Session Start: Repository State
77
+ - Context about starting state
78
+ - Files modified/created
79
+ - Known issues at start
80
+
81
+ ## Executive Summary: Changes Made
82
+ - High-level overview
83
+ - Key accomplishments
84
+ - Metrics
85
+
86
+ ## Technical Details: Implementation
87
+ - Detailed explanation of changes
88
+ - Code examples
89
+ - Architecture diagrams
90
+ - Design decisions and rationale
91
+
92
+ ## Verification & Testing
93
+ - Test results
94
+ - Manual validation steps
95
+
96
+ ## Next Steps: Recommended Actions
97
+ - Prioritized task list
98
+ - Estimated effort
99
+ - Dependencies
100
+
101
+ ## Critical Context for Next Session
102
+ - Environment setup
103
+ - Key files to know
104
+ - Integration points
105
+
106
+ ## Known Issues & Limitations
107
+ - Documented problems
108
+ - Workarounds
109
+ - Future improvements
110
+ ```
111
+
112
+ **Best practices:**
113
+ - Write immediately after completing work
114
+ - Be specific about what was done and why
115
+ - Include code snippets for key changes
116
+ - Document all known issues
117
+ - Provide actionable next steps
118
+ - Include verification steps
119
+
120
+ ---
121
+
122
+ ### `implementation-plan-*.md`
123
+
124
+ **Purpose:** Planning documents generated by Claude Code in planning mode
125
+
126
+ **Created by:** Claude Code during planning phase
127
+
128
+ **Used by:** Claude Code for implementation, Human developers for review/approval
129
+
130
+ **Naming pattern:** `implementation-plan-<feature-name>.md`
131
+
132
+ **Description:**
133
+ These documents are created when Claude Code enters planning mode for complex tasks. They contain:
134
+ - **Current state analysis:** Understanding of existing codebase
135
+ - **Proposed approach:** High-level implementation strategy
136
+ - **File-by-file changes:** Detailed plan for modifications
137
+ - **Design decisions:** Rationale for architectural choices
138
+ - **Risk assessment:** Potential issues and mitigation
139
+ - **Testing strategy:** How to verify the implementation
140
+
141
+ **Examples:**
142
+ - `implementation-plan-agent-api.md` - Planning document for Agent API implementation
143
+
144
+ **Best practices:**
145
+ - Create before starting complex work
146
+ - Present multiple options when applicable
147
+ - Explain trade-offs clearly
148
+ - Get human approval before implementing
149
+ - Update if approach changes during implementation
150
+
151
+ ---
152
+
153
+ ## Workflow
154
+
155
+ ### Starting a New Feature
156
+
157
+ 1. **Human** creates `prompt-<feature>.md` with requirements
158
+ 2. **Claude Code** reads prompt and assesses complexity
159
+ 3. **Claude Code** (if complex) creates `implementation-plan-<feature>.md`
160
+ 4. **Human** reviews and approves plan
161
+ 5. **Claude Code** implements according to plan
162
+ 6. **Claude Code** creates `implementation-status-<feature>.md` at session end
163
+
164
+ ### Continuing Work
165
+
166
+ 1. **Claude Code** reads relevant `implementation-status-*.md` files, as specified by human
167
+ 2. **Claude Code** reviews "Next Steps" section
168
+ 3. **Human** provides additional context or priorities
169
+ 4. **Claude Code** continues work
170
+ 5. **Claude Code** creates new `implementation-status-*.md` at session end
171
+
172
+ ---
173
+
174
+ ## File Lifecycle
175
+
176
+ ### Prompt Files
177
+ - **Created:** Before work begins
178
+ - **Updated:** When additional context or requirements not initially provided are needed
179
+ - **Deleted:** Never (historical record)
180
+
181
+ ### Implementation Status Files
182
+ - **Created:** End of each major work session
183
+ - **Updated:** Only when additional work is done in the same session after the document was initially created
184
+ - **Deleted:** Never (historical record)
185
+
186
+ ### Implementation Plan Files
187
+ - **Created:** During planning phase for complex features
188
+ - **Updated:** If approach changes significantly
189
+ - **Deleted:** Never (historical record)
190
+
191
+ ---
192
+
193
+ ### Documentation
194
+
195
+ **Relationship to docs/:**
196
+ - `claude_spec/` = Internal context for Claude Code
197
+ - `docs/` = External documentation for users/developers
198
+ - Implementation status may reference docs files
199
+ - Public-facing docs should be created separately
200
+
201
+ ---
202
+
203
+ ## Best Practices
204
+
205
+ ### For Human Developers
206
+
207
+ **Writing Prompts:**
208
+ - Be specific about requirements
209
+ - Include relevant context (file paths, existing patterns)
210
+ - Specify constraints or preferences
211
+ - Provide examples when helpful
212
+
213
+ **Reviewing Plans:**
214
+ ## Integration with Development Workflow
215
+
216
+ ### Version Control
217
+
218
+ **Commit Strategy:**
219
+ - Commit `prompt-*.md` files separately from code
220
+ - Commit `implementation-plan-*.md` with "Plan:" prefix
221
+ - Commit `implementation-status-*.md` with "Handoff:" prefix
222
+
223
+ **Example:**
224
+ ```bash
225
+ git add claude_spec/prompt-new-feature.md
226
+ git commit -m "Plan: Add requirements for new feature"
227
+
228
+ git add claude_spec/implementation-plan-new-feature.md
229
+ git commit -m "Plan: Design approach for new feature"
230
+
231
+ git add src/ tests/ claude_spec/implementation-status-new-feature.md
232
+ git commit -m "Implement new feature
233
+
234
+ - Add core functionality
235
+ - Add tests
236
+ - Update documentation
237
+
238
+ Handoff: claude_spec/implementation-status-new-feature.md"
239
+ ```
240
+
241
+ ### Pull Requests
242
+
243
+ **When creating PRs:**
244
+ - Link to relevant `implementation-status-*.md` in PR description
245
+ - Reference `implementation-plan-*.md` if major architectural changes
246
+ - Include "Next Steps" from handoff document in PR comments
247
+ - Check for security implications
248
+ - Validate architectural decisions
249
+ - Consider maintenance burden
250
+ - Verify test coverage plans
251
+
252
+ **Using Handoffs:**
253
+ - Read "Next Steps" before continuing work
254
+ - Review "Known Issues" to avoid duplicate effort
255
+ - Check "Critical Context" for environment setup
256
+ - Validate against "Verification" section
257
+
258
+ ### For Claude Code
259
+
260
+ **Creating Status Documents:**
261
+ - Write immediately after completing work (while fresh)
262
+ - Be thorough in technical details
263
+ - Include actual code snippets (not pseudocode)
264
+ - Document all decisions and rationale
265
+ - Provide actionable next steps
266
+ - Test all verification steps
267
+
268
+ **Creating Plans:**
269
+ - Analyze existing code before planning
270
+ - Present multiple options with trade-offs
271
+ - Explain design decisions
272
+ - Consider security and performance
273
+ - Plan for testing and validation
274
+ - Update if approach changes
275
+
276
+ **Reading Context:**
277
+ - Always check for relevant `implementation-status-*.md` files
278
+ - Read "Next Steps" sections for priorities
279
+ - Note any "Known Issues" to avoid
280
+ - Check "Critical Context" for setup requirements
281
+
282
+ ---
283
+
284
+ ## Directory Structure Example
285
+
286
+ ```
287
+ claude_spec/
288
+ ├── CLAUDE.md # This file
289
+ ├── README.md # Brief overview
290
+
291
+ ├── prompt-docker-executor.md # Initial requirements
292
+ ├── implementation-plan-agent-api.md # Planning document
293
+
294
+ ├── implementation-status-docker-executor.md # Component handoff
295
+ ├── implementation-status-mcp-server.md # Component handoff
296
+ ├── implementation-status-agent-api.md # Component handoff
297
+ ├── implementation-status-agent-api-testing.md # Testing handoff
298
+ └── implementation-status-skills-system.md # Component handoff
299
+ ```
300
+
301
+ ---
302
+
303
+ ## Conventions Summary
304
+
305
+ | Prefix | Creator | Purpose | Lifecycle |
306
+ |--------|---------|---------|-----------|
307
+ | `prompt-` | Human | Initial task instructions | Created once, archived after use |
308
+ | `implementation-status-` | Claude Code | Session handoff with next steps | Created/updated per session |
309
+ | `implementation-plan-` | Claude Code | Planning document for complex tasks | Created in planning mode |
310
+
311
+ ---
312
+
313
+ **Last Updated:** November 28, 2025
314
+ **Maintained By:** Development Team + Claude Code
315
+ **Purpose:** Enable effective AI-assisted development across sessions
generate_diagram.py CHANGED
@@ -120,7 +120,7 @@ def generate_diagram():
120
 
121
  # Generate PNG
122
  output_png = docs_dir / "architecture.png"
123
- print(f"\nGenerating PNG diagram...")
124
  try:
125
  subprocess.run(
126
  [
@@ -143,7 +143,7 @@ def generate_diagram():
143
 
144
  # Generate SVG
145
  output_svg = docs_dir / "architecture.svg"
146
- print(f"\nGenerating SVG diagram...")
147
  try:
148
  subprocess.run(
149
  [
@@ -165,11 +165,11 @@ def generate_diagram():
165
  print("\n" + "=" * 80)
166
  print("SUCCESS! Diagram generated successfully")
167
  print("=" * 80)
168
- print(f"\nFiles created:")
169
  print(f" - {output_png}")
170
  print(f" - {output_svg}")
171
- print(f"\nAdd to README.md:")
172
- print(f' ![Architecture Diagram](docs/architecture.png)')
173
  print("=" * 80 + "\n")
174
 
175
  return True
 
120
 
121
  # Generate PNG
122
  output_png = docs_dir / "architecture.png"
123
+ print("\nGenerating PNG diagram...")
124
  try:
125
  subprocess.run(
126
  [
 
143
 
144
  # Generate SVG
145
  output_svg = docs_dir / "architecture.svg"
146
+ print("\nGenerating SVG diagram...")
147
  try:
148
  subprocess.run(
149
  [
 
165
  print("\n" + "=" * 80)
166
  print("SUCCESS! Diagram generated successfully")
167
  print("=" * 80)
168
+ print("\nFiles created:")
169
  print(f" - {output_png}")
170
  print(f" - {output_svg}")
171
+ print("\nAdd to README.md:")
172
+ print(" ![Architecture Diagram](docs/architecture.png)")
173
  print("=" * 80 + "\n")
174
 
175
  return True
mcp_server/docker_client.py CHANGED
@@ -228,9 +228,7 @@ class DockerExecutionClient:
228
  loop = asyncio.get_event_loop()
229
  return await loop.run_in_executor(None, _get_docstring)
230
 
231
- async def read_file_docstring(
232
- self, user_id: str, file_path: str, function_name: str
233
- ) -> str:
234
  """Read the docstring of a function from a Python file in the user's container.
235
 
236
  Args:
 
228
  loop = asyncio.get_event_loop()
229
  return await loop.run_in_executor(None, _get_docstring)
230
 
231
+ async def read_file_docstring(self, user_id: str, file_path: str, function_name: str) -> str:
 
 
232
  """Read the docstring of a function from a Python file in the user's container.
233
 
234
  Args:
mcp_server/server.py CHANGED
@@ -8,17 +8,15 @@ import logging
8
  from contextlib import asynccontextmanager
9
  from typing import Annotated
10
 
11
- from fastmcp import FastMCP
12
- from fastmcp import Context
13
-
14
  from docker_client import DockerExecutionClient
15
- from starlette.responses import JSONResponse
16
  from starlette.requests import Request
 
17
  from utils import (
18
- list_available_skills,
19
- get_skill,
20
- generate_skills_section,
21
  generate_agent_prompt,
 
 
 
22
  )
23
 
24
  # Configure logging
@@ -143,7 +141,7 @@ async def execute_bash(
143
  return {
144
  "exit_code": -1,
145
  "stdout": "",
146
- "stderr": f"Error: {str(e)}",
147
  }
148
 
149
 
@@ -326,11 +324,13 @@ async def health_check(request: Request):
326
  Returns:
327
  JSON response with server status
328
  """
329
- return JSONResponse({
330
- "status": "healthy",
331
- "service": "mcp-code-executor",
332
- "client_initialized": docker_client is not None,
333
- })
 
 
334
 
335
 
336
  # Skills endpoints
@@ -346,15 +346,20 @@ async def list_skills(request: Request):
346
  """
347
  try:
348
  skills = list_available_skills()
349
- return JSONResponse({
350
- "skills": skills,
351
- "count": len(skills),
352
- })
 
 
353
  except Exception as e:
354
  logger.error(f"Error listing skills: {e}")
355
- return JSONResponse({
356
- "error": str(e),
357
- }, status_code=500)
 
 
 
358
 
359
 
360
  @mcp.custom_route("/skills/{skill_name}", methods=["GET"])
@@ -372,14 +377,20 @@ async def get_skill_by_name(request: Request):
372
  skill_data = get_skill(skill_name)
373
  return JSONResponse(skill_data)
374
  except FileNotFoundError as e:
375
- return JSONResponse({
376
- "error": str(e),
377
- }, status_code=404)
 
 
 
378
  except Exception as e:
379
  logger.error(f"Error retrieving skill {skill_name}: {e}")
380
- return JSONResponse({
381
- "error": str(e),
382
- }, status_code=500)
 
 
 
383
 
384
 
385
  if __name__ == "__main__":
 
8
  from contextlib import asynccontextmanager
9
  from typing import Annotated
10
 
 
 
 
11
  from docker_client import DockerExecutionClient
12
+ from fastmcp import Context, FastMCP
13
  from starlette.requests import Request
14
+ from starlette.responses import JSONResponse
15
  from utils import (
 
 
 
16
  generate_agent_prompt,
17
+ generate_skills_section,
18
+ get_skill,
19
+ list_available_skills,
20
  )
21
 
22
  # Configure logging
 
141
  return {
142
  "exit_code": -1,
143
  "stdout": "",
144
+ "stderr": f"Error: {e!s}",
145
  }
146
 
147
 
 
324
  Returns:
325
  JSON response with server status
326
  """
327
+ return JSONResponse(
328
+ {
329
+ "status": "healthy",
330
+ "service": "mcp-code-executor",
331
+ "client_initialized": docker_client is not None,
332
+ }
333
+ )
334
 
335
 
336
  # Skills endpoints
 
346
  """
347
  try:
348
  skills = list_available_skills()
349
+ return JSONResponse(
350
+ {
351
+ "skills": skills,
352
+ "count": len(skills),
353
+ }
354
+ )
355
  except Exception as e:
356
  logger.error(f"Error listing skills: {e}")
357
+ return JSONResponse(
358
+ {
359
+ "error": str(e),
360
+ },
361
+ status_code=500,
362
+ )
363
 
364
 
365
  @mcp.custom_route("/skills/{skill_name}", methods=["GET"])
 
377
  skill_data = get_skill(skill_name)
378
  return JSONResponse(skill_data)
379
  except FileNotFoundError as e:
380
+ return JSONResponse(
381
+ {
382
+ "error": str(e),
383
+ },
384
+ status_code=404,
385
+ )
386
  except Exception as e:
387
  logger.error(f"Error retrieving skill {skill_name}: {e}")
388
+ return JSONResponse(
389
+ {
390
+ "error": str(e),
391
+ },
392
+ status_code=500,
393
+ )
394
 
395
 
396
  if __name__ == "__main__":
mcp_server/utils/__init__.py CHANGED
@@ -1,19 +1,19 @@
1
  """Utility modules for MCP server."""
2
 
3
  from .skill_utils import (
4
- parse_skill_frontmatter,
5
- get_skill,
6
- list_available_skills,
7
  extract_use_cases,
8
- generate_skills_section,
9
  generate_agent_prompt,
 
 
 
 
10
  )
11
 
12
  __all__ = [
13
- "parse_skill_frontmatter",
14
- "get_skill",
15
- "list_available_skills",
16
  "extract_use_cases",
17
- "generate_skills_section",
18
  "generate_agent_prompt",
 
 
 
 
19
  ]
 
1
  """Utility modules for MCP server."""
2
 
3
  from .skill_utils import (
 
 
 
4
  extract_use_cases,
 
5
  generate_agent_prompt,
6
+ generate_skills_section,
7
+ get_skill,
8
+ list_available_skills,
9
+ parse_skill_frontmatter,
10
  )
11
 
12
  __all__ = [
 
 
 
13
  "extract_use_cases",
 
14
  "generate_agent_prompt",
15
+ "generate_skills_section",
16
+ "get_skill",
17
+ "list_available_skills",
18
+ "parse_skill_frontmatter",
19
  ]
mcp_server/utils/skill_utils.py CHANGED
@@ -17,7 +17,7 @@ def parse_skill_frontmatter(content: str) -> tuple[dict, str]:
17
  Tuple of (metadata_dict, content_without_frontmatter)
18
  """
19
  # Match YAML frontmatter between --- markers
20
- pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$'
21
  match = re.match(pattern, content, re.DOTALL)
22
 
23
  if not match:
@@ -27,10 +27,10 @@ def parse_skill_frontmatter(content: str) -> tuple[dict, str]:
27
 
28
  # Parse YAML-like frontmatter (simple key: value pairs)
29
  metadata = {}
30
- for line in frontmatter.split('\n'):
31
  line = line.strip()
32
- if ':' in line:
33
- key, value = line.split(':', 1)
34
  metadata[key.strip()] = value.strip()
35
 
36
  return metadata, body.strip()
@@ -92,12 +92,14 @@ def list_available_skills() -> list[dict]:
92
  try:
93
  skill_data = get_skill(skill_dir.name)
94
  # Return lightweight metadata for listing
95
- skills.append({
96
- "skill_id": skill_data["skill_id"],
97
- "name": skill_data["name"],
98
- "description": skill_data["description"],
99
- "version": skill_data["version"],
100
- })
 
 
101
  except Exception as e:
102
  logger.error(f"Error loading skill {skill_dir.name}: {e}")
103
 
@@ -114,7 +116,9 @@ def extract_use_cases(content: str) -> str:
114
  Formatted use cases section, or empty string if not found
115
  """
116
  # Look for "When to Use This Skill" section
117
- pattern = r'## When to Use This Skill\s*\n\s*(?:Invoke this skill when.*?:)?\s*\n((?:[-*]\s+.+\n?)+)'
 
 
118
  match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
119
 
120
  if match:
@@ -138,24 +142,24 @@ def generate_skills_section(skills: list[dict]) -> str:
138
 
139
  sections = []
140
  for skill in skills:
141
- skill_id = skill['skill_id']
142
- name = skill['name']
143
- version = skill['version']
144
- description = skill['description']
145
 
146
  # Get full skill data for dependencies
147
  try:
148
  full_skill = get_skill(skill_id)
149
- dependencies = full_skill.get('dependencies', 'None')
150
 
151
  # Extract "When to Use" section from skill content if available
152
- content = full_skill.get('content', '')
153
  use_cases = extract_use_cases(content)
154
 
155
  except Exception as e:
156
  logger.warning(f"Could not load full skill data for {skill_id}: {e}")
157
- dependencies = 'Unknown'
158
- use_cases = ''
159
 
160
  # Format skill section
161
  section = f"""---
@@ -170,7 +174,7 @@ def generate_skills_section(skills: list[dict]) -> str:
170
  """
171
  sections.append(section)
172
 
173
- return '\n'.join(sections)
174
 
175
 
176
  def generate_agent_prompt(skills_section: str) -> str:
 
17
  Tuple of (metadata_dict, content_without_frontmatter)
18
  """
19
  # Match YAML frontmatter between --- markers
20
+ pattern = r"^---\s*\n(.*?)\n---\s*\n(.*)$"
21
  match = re.match(pattern, content, re.DOTALL)
22
 
23
  if not match:
 
27
 
28
  # Parse YAML-like frontmatter (simple key: value pairs)
29
  metadata = {}
30
+ for line in frontmatter.split("\n"):
31
  line = line.strip()
32
+ if ":" in line:
33
+ key, value = line.split(":", 1)
34
  metadata[key.strip()] = value.strip()
35
 
36
  return metadata, body.strip()
 
92
  try:
93
  skill_data = get_skill(skill_dir.name)
94
  # Return lightweight metadata for listing
95
+ skills.append(
96
+ {
97
+ "skill_id": skill_data["skill_id"],
98
+ "name": skill_data["name"],
99
+ "description": skill_data["description"],
100
+ "version": skill_data["version"],
101
+ }
102
+ )
103
  except Exception as e:
104
  logger.error(f"Error loading skill {skill_dir.name}: {e}")
105
 
 
116
  Formatted use cases section, or empty string if not found
117
  """
118
  # Look for "When to Use This Skill" section
119
+ pattern = (
120
+ r"## When to Use This Skill\s*\n\s*(?:Invoke this skill when.*?:)?\s*\n((?:[-*]\s+.+\n?)+)"
121
+ )
122
  match = re.search(pattern, content, re.IGNORECASE | re.MULTILINE)
123
 
124
  if match:
 
142
 
143
  sections = []
144
  for skill in skills:
145
+ skill_id = skill["skill_id"]
146
+ name = skill["name"]
147
+ version = skill["version"]
148
+ description = skill["description"]
149
 
150
  # Get full skill data for dependencies
151
  try:
152
  full_skill = get_skill(skill_id)
153
+ dependencies = full_skill.get("dependencies", "None")
154
 
155
  # Extract "When to Use" section from skill content if available
156
+ content = full_skill.get("content", "")
157
  use_cases = extract_use_cases(content)
158
 
159
  except Exception as e:
160
  logger.warning(f"Could not load full skill data for {skill_id}: {e}")
161
+ dependencies = "Unknown"
162
+ use_cases = ""
163
 
164
  # Format skill section
165
  section = f"""---
 
174
  """
175
  sections.append(section)
176
 
177
+ return "\n".join(sections)
178
 
179
 
180
  def generate_agent_prompt(skills_section: str) -> str:
tests/test_agent_api/conftest.py CHANGED
@@ -106,7 +106,7 @@ def openai_client():
106
  """Create OpenAI client for testing Agent API."""
107
  return OpenAI(
108
  base_url="http://localhost:8000/v1",
109
- api_key="dummy" # Not used but required by SDK
110
  )
111
 
112
 
 
106
  """Create OpenAI client for testing Agent API."""
107
  return OpenAI(
108
  base_url="http://localhost:8000/v1",
109
+ api_key="dummy", # Not used but required by SDK
110
  )
111
 
112
 
tests/test_agent_api/test_direct_agent.py CHANGED
@@ -1,8 +1,10 @@
1
  """Direct test to see all agent output including errors."""
2
 
3
  import asyncio
 
4
  import httpx
5
 
 
6
  async def test_agent():
7
  """Test agent and print all logs."""
8
  async with httpx.AsyncClient(timeout=30.0) as client:
@@ -11,13 +13,13 @@ async def test_agent():
11
  print("=" * 80)
12
 
13
  async with client.stream(
14
- 'POST',
15
- 'http://localhost:8000/v1/chat/completions',
16
  json={
17
- 'model': 'claude-3-5-sonnet-20241022',
18
- 'messages': [{'role': 'user', 'content': 'Execute this Python code: print(2 + 2)'}],
19
- 'stream': True,
20
- }
21
  ) as response:
22
  print(f"Status: {response.status_code}")
23
  print(f"Headers: {dict(response.headers)}")
@@ -28,14 +30,15 @@ async def test_agent():
28
  async for line in response.aiter_lines():
29
  if line.strip():
30
  print(f"CHUNK: {line}")
31
- if line.startswith('data: ') and not line.startswith('data: [DONE]'):
32
  import json
 
33
  try:
34
  data = json.loads(line[6:])
35
- if 'choices' in data and len(data['choices']) > 0:
36
- delta = data['choices'][0].get('delta', {})
37
- if 'content' in delta:
38
- full_response += delta['content']
39
  except:
40
  pass
41
 
@@ -43,5 +46,6 @@ async def test_agent():
43
  print(f"\nFull response:\n{full_response}")
44
  print("=" * 80)
45
 
 
46
  if __name__ == "__main__":
47
  asyncio.run(test_agent())
 
1
  """Direct test to see all agent output including errors."""
2
 
3
  import asyncio
4
+
5
  import httpx
6
 
7
+
8
  async def test_agent():
9
  """Test agent and print all logs."""
10
  async with httpx.AsyncClient(timeout=30.0) as client:
 
13
  print("=" * 80)
14
 
15
  async with client.stream(
16
+ "POST",
17
+ "http://localhost:8000/v1/chat/completions",
18
  json={
19
+ "model": "claude-3-5-sonnet-20241022",
20
+ "messages": [{"role": "user", "content": "Execute this Python code: print(2 + 2)"}],
21
+ "stream": True,
22
+ },
23
  ) as response:
24
  print(f"Status: {response.status_code}")
25
  print(f"Headers: {dict(response.headers)}")
 
30
  async for line in response.aiter_lines():
31
  if line.strip():
32
  print(f"CHUNK: {line}")
33
+ if line.startswith("data: ") and not line.startswith("data: [DONE]"):
34
  import json
35
+
36
  try:
37
  data = json.loads(line[6:])
38
+ if "choices" in data and len(data["choices"]) > 0:
39
+ delta = data["choices"][0].get("delta", {})
40
+ if "content" in delta:
41
+ full_response += delta["content"]
42
  except:
43
  pass
44
 
 
46
  print(f"\nFull response:\n{full_response}")
47
  print("=" * 80)
48
 
49
+
50
  if __name__ == "__main__":
51
  asyncio.run(test_agent())
tests/test_agent_api/test_server.py CHANGED
@@ -9,7 +9,6 @@ These tests require:
9
 
10
  import httpx
11
  import pytest
12
- from openai import OpenAI
13
 
14
 
15
  class TestHealthEndpoints:
@@ -202,9 +201,7 @@ class TestSessionManagement:
202
  # Second turn: reference previous context
203
  stream2 = openai_client.chat.completions.create(
204
  model=test_model,
205
- messages=[
206
- {"role": "user", "content": "Now read the file you just created"}
207
- ],
208
  stream=True,
209
  user="test-session-123", # Same user ID for context
210
  )
 
9
 
10
  import httpx
11
  import pytest
 
12
 
13
 
14
  class TestHealthEndpoints:
 
201
  # Second turn: reference previous context
202
  stream2 = openai_client.chat.completions.create(
203
  model=test_model,
204
+ messages=[{"role": "user", "content": "Now read the file you just created"}],
 
 
205
  stream=True,
206
  user="test-session-123", # Same user ID for context
207
  )
tests/test_config_loading.py DELETED
@@ -1,163 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Demonstrate how .env values propagate through the system."""
3
-
4
- import os
5
- import sys
6
- from pathlib import Path
7
-
8
-
9
- def load_dotenv():
10
- """Load .env file into environment variables."""
11
- env_path = Path(__file__).parent / ".env"
12
-
13
- if not env_path.exists():
14
- print(f"⚠️ .env file not found at {env_path}")
15
- return False
16
-
17
- # Simple .env parser (avoids dependency on python-dotenv)
18
- with open(env_path) as f:
19
- for line in f:
20
- line = line.strip()
21
- # Skip comments and empty lines
22
- if not line or line.startswith('#'):
23
- continue
24
- # Parse KEY=VALUE
25
- if '=' in line:
26
- key, value = line.split('=', 1)
27
- key = key.strip()
28
- value = value.strip()
29
- # Set in environment
30
- os.environ[key] = value
31
-
32
- return True
33
-
34
-
35
- def test_config_loading():
36
- """Show how .env values get loaded and used."""
37
-
38
- print("=" * 70)
39
- print("Configuration Loading Test")
40
- print("=" * 70)
41
- print()
42
-
43
- # Step 0: Load .env file into environment
44
- print("STEP 0: Loading .env File")
45
- print("-" * 70)
46
- if load_dotenv():
47
- print(" ✓ .env file loaded into environment variables")
48
- else:
49
- print(" ✗ Failed to load .env file")
50
- return False
51
- print()
52
-
53
- # Step 1: Show raw environment variables (after loading .env)
54
- print("STEP 1: Raw Environment Variables (from .env)")
55
- print("-" * 70)
56
- env_vars = [
57
- "ANTHROPIC_API_KEY",
58
- "DEFAULT_MODEL",
59
- "AGENT_API_HOST",
60
- "AGENT_API_PORT",
61
- "MCP_SERVER_URL",
62
- "SESSION_TIMEOUT_SECONDS",
63
- ]
64
-
65
- for var in env_vars:
66
- value = os.getenv(var, "NOT SET")
67
- # Mask API keys
68
- if "API_KEY" in var and value != "NOT SET":
69
- value = value[:10] + "..." + value[-4:] if len(value) > 14 else "***"
70
- print(f" {var:30} = {value}")
71
-
72
- print()
73
-
74
- # Step 2: Import settings (this triggers .env loading)
75
- print("STEP 2: Importing settings from agent_api.config")
76
- print("-" * 70)
77
- print(" Executing: from agent_api.config import settings")
78
-
79
- try:
80
- from agent_api.config import settings
81
- print(" ✓ Settings imported successfully")
82
- except Exception as e:
83
- print(f" ✗ Failed to import settings: {e}")
84
- return False
85
-
86
- print()
87
-
88
- # Step 3: Show Pydantic settings object
89
- print("STEP 3: Pydantic Settings Object Values")
90
- print("-" * 70)
91
- print(f" settings.agent_api_host = {settings.agent_api_host}")
92
- print(f" settings.agent_api_port = {settings.agent_api_port}")
93
- print(f" settings.mcp_server_url = {settings.mcp_server_url}")
94
- print(f" settings.default_model = {settings.default_model}")
95
- print(f" settings.agent_name = {settings.agent_name}")
96
- print(f" settings.session_timeout_seconds = {settings.session_timeout_seconds}")
97
-
98
- print()
99
-
100
- # Step 4: Show how these values would be used
101
- print("STEP 4: How Values Are Used in Code")
102
- print("-" * 70)
103
-
104
- print("\n In agent_api/server.py:")
105
- print(f" AgentManager(mcp_server_url='{settings.mcp_server_url}')")
106
- print(f" logger.info('Default Model: {settings.default_model}')")
107
-
108
- print("\n In agent_api/agent_manager.py:")
109
- print(f" model = LiteLlm(model='{settings.default_model}')")
110
- print(f" agent = Agent(name='{settings.agent_name}', ...)")
111
-
112
- print()
113
-
114
- # Step 5: Verify API key in environment
115
- print("STEP 5: API Key Availability for LiteLLM")
116
- print("-" * 70)
117
-
118
- api_key = os.getenv("ANTHROPIC_API_KEY")
119
- if api_key:
120
- masked = api_key[:10] + "..." + api_key[-4:] if len(api_key) > 14 else "***"
121
- print(f" ✓ ANTHROPIC_API_KEY found in environment: {masked}")
122
- print(" → LiteLLM will use this key automatically")
123
- else:
124
- print(" ✗ ANTHROPIC_API_KEY not found in environment")
125
- print(" → LiteLLM will fail when trying to use Anthropic models")
126
-
127
- print()
128
-
129
- # Step 6: Summary
130
- print("=" * 70)
131
- print("SUMMARY: How .env Values Propagate")
132
- print("=" * 70)
133
- print()
134
- print("TWO PARALLEL PATHS:")
135
- print()
136
- print("Path 1: For Agent API Configuration")
137
- print(" .env file → Pydantic BaseSettings → settings object")
138
- print(" - Pydantic reads .env automatically when Settings() is created")
139
- print(" - Values available as: settings.agent_api_port, settings.default_model, etc.")
140
- print(" - Used by: server.py, agent_manager.py, session_store.py")
141
- print()
142
- print("Path 2: For API Keys (LiteLLM)")
143
- print(" .env file → os.environ → LiteLLM")
144
- print(" - Need to manually load .env (or use python-dotenv)")
145
- print(" - API keys read from os.environ by LiteLLM")
146
- print(" - Used by: LiteLlm() when creating model")
147
- print()
148
- print("IMPORTANT: Pydantic does NOT set os.environ!")
149
- print(" - settings.default_model works ✓")
150
- print(" - os.getenv('DEFAULT_MODEL') may not work ✗ (unless manually loaded)")
151
- print()
152
- print("All .env values are now available throughout the application!")
153
- print()
154
-
155
- return True
156
-
157
-
158
- if __name__ == "__main__":
159
- # Make sure we can import from agent_api
160
- sys.path.insert(0, "/Users/mohardey/Projects/code-execution-with-mcp")
161
-
162
- success = test_config_loading()
163
- sys.exit(0 if success else 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_mcp_server/conftest.py CHANGED
@@ -50,5 +50,6 @@ async def cleanup_containers():
50
 
51
  # Cleanup after tests complete
52
  import server
 
53
  if server.docker_client:
54
  server.docker_client.cleanup_all()
 
50
 
51
  # Cleanup after tests complete
52
  import server
53
+
54
  if server.docker_client:
55
  server.docker_client.cleanup_all()
tests/test_mcp_server/test_server.py CHANGED
@@ -152,11 +152,11 @@ class TestWriteFileTool:
152
 
153
  async def test_write_python_script(self, mcp_client: FastMCPClient):
154
  """Test writing a Python script with special characters and executing it."""
155
- python_code = '''def greet(name):
156
  return f"Hello, {name}!"
157
 
158
  print(greet("World"))
159
- '''
160
 
161
  # Write the Python script
162
  write_result = await mcp_client.call_tool(
@@ -237,15 +237,13 @@ class TestReadFileTool:
237
  @pytest.mark.parametrize(
238
  "offset,line_count",
239
  [
240
- (0, 1), # First line only
241
- (0, 3), # First three lines
242
- (2, 2), # Lines 3-4
243
  (4, 10), # Last line (more than available)
244
  ],
245
  )
246
- async def test_read_pagination(
247
- self, mcp_client: FastMCPClient, offset: int, line_count: int
248
- ):
249
  """Test file reading with various pagination parameters."""
250
  content = "\n".join([f"Line {i}" for i in range(1, 11)])
251
 
@@ -347,7 +345,11 @@ class TestReadDocstringTool:
347
  ],
348
  )
349
  async def test_read_various_docstrings(
350
- self, mcp_client: FastMCPClient, function_name: str, function_code: str, expected_in_docstring: str
 
 
 
 
351
  ):
352
  """Test reading docstrings from various functions."""
353
  # Write the function to a file
@@ -457,8 +459,8 @@ class TestUserIsolation:
457
 
458
  async def test_different_users_isolated(self, monkeypatch):
459
  """Test that different users cannot access each other's files."""
460
- from fastmcp.client import Client as FastMCPClient
461
  import server
 
462
 
463
  # Test with user1
464
  def mock_get_user_id_user1(ctx):
@@ -558,8 +560,8 @@ This is the skill body.
558
 
559
  def test_get_nonexistent_skill(self):
560
  """Test requesting a skill that doesn't exist."""
561
- from utils.skill_utils import get_skill
562
  import pytest
 
563
 
564
  with pytest.raises(FileNotFoundError, match="not found"):
565
  get_skill("nonexistent-skill")
 
152
 
153
  async def test_write_python_script(self, mcp_client: FastMCPClient):
154
  """Test writing a Python script with special characters and executing it."""
155
+ python_code = """def greet(name):
156
  return f"Hello, {name}!"
157
 
158
  print(greet("World"))
159
+ """
160
 
161
  # Write the Python script
162
  write_result = await mcp_client.call_tool(
 
237
  @pytest.mark.parametrize(
238
  "offset,line_count",
239
  [
240
+ (0, 1), # First line only
241
+ (0, 3), # First three lines
242
+ (2, 2), # Lines 3-4
243
  (4, 10), # Last line (more than available)
244
  ],
245
  )
246
+ async def test_read_pagination(self, mcp_client: FastMCPClient, offset: int, line_count: int):
 
 
247
  """Test file reading with various pagination parameters."""
248
  content = "\n".join([f"Line {i}" for i in range(1, 11)])
249
 
 
345
  ],
346
  )
347
  async def test_read_various_docstrings(
348
+ self,
349
+ mcp_client: FastMCPClient,
350
+ function_name: str,
351
+ function_code: str,
352
+ expected_in_docstring: str,
353
  ):
354
  """Test reading docstrings from various functions."""
355
  # Write the function to a file
 
459
 
460
  async def test_different_users_isolated(self, monkeypatch):
461
  """Test that different users cannot access each other's files."""
 
462
  import server
463
+ from fastmcp.client import Client as FastMCPClient
464
 
465
  # Test with user1
466
  def mock_get_user_id_user1(ctx):
 
560
 
561
  def test_get_nonexistent_skill(self):
562
  """Test requesting a skill that doesn't exist."""
 
563
  import pytest
564
+ from utils.skill_utils import get_skill
565
 
566
  with pytest.raises(FileNotFoundError, match="not found"):
567
  get_skill("nonexistent-skill")