santimber commited on
Commit
f206914
·
1 Parent(s): 1204ed9

updates on how we handle files

Browse files
Files changed (3) hide show
  1. __pycache__/tools.cpython-311.pyc +0 -0
  2. app.py +55 -23
  3. tools.py +5 -76
__pycache__/tools.cpython-311.pyc CHANGED
Binary files a/__pycache__/tools.cpython-311.pyc and b/__pycache__/tools.cpython-311.pyc differ
 
app.py CHANGED
@@ -25,9 +25,9 @@ from tools import (
25
  extract_text_from_image_tool,
26
  analyze_csv_file_tool,
27
  analyze_excel_file_tool,
28
- download_file_tool,
29
  )
30
  import re
 
31
 
32
  # (Keep Constants as is)
33
  # --- Constants ---
@@ -48,7 +48,6 @@ tools = [
48
  code_execution_tool,
49
  math_calculation_tool,
50
  python_repl_tool,
51
- download_file_tool,
52
  extract_text_from_image_tool,
53
  analyze_csv_file_tool,
54
  analyze_excel_file_tool,
@@ -63,12 +62,12 @@ class MyAgent(TypedDict):
63
 
64
 
65
  # =========================
66
- # Simplified File Handling
67
  # =========================
68
  def process_question_with_files(question_data: dict) -> str:
69
  """
70
- Simple file handling - just pass the file info to the agent
71
- and let it use its tools to handle the file.
72
  """
73
  question_text = question_data.get('question', '')
74
  file_name = question_data.get('file_name', '')
@@ -77,15 +76,51 @@ def process_question_with_files(question_data: dict) -> str:
77
  if not file_name:
78
  return question_text
79
 
80
- # Use the correct API endpoint based on the documentation
81
- # Files are accessed via /files/{task_id} not /files/{file_name}
82
- if task_id:
 
83
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
84
- else:
85
- # Fallback to old method if task_id is not available
86
- file_url = f"{DEFAULT_API_URL}/files/{file_name}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- return f"{question_text}\n\n[There is an attached file: {file_name}. You can download it from: {file_url}]"
 
 
 
 
89
 
90
 
91
  def extract_final_answer(text: str) -> str:
@@ -141,17 +176,14 @@ def assistant(state: MyAgent):
141
  system_message = SystemMessage(content="""
142
  You are a helpful assistant tasked with answering questions using a set of tools.
143
 
144
- IMPORTANT: When a question mentions an attached file, follow this process:
145
- 1. Use download_file_tool with the task_id or URL to download the file
146
- - For GAIA files: pass the task_id directly
147
- - For other URLs: pass the full URL
148
- - For content: pass the content to save as a file
149
- 2. Use the appropriate analysis tool based on file type:
150
- - For images: use image_recognition_tool or extract_text_from_image_tool
151
- - For audio: use audio_processing_tool
152
- - For spreadsheets: use analyze_csv_file_tool or analyze_excel_file_tool
153
- - For text files: use read_file_tool
154
- - For code files: use python_execution_tool or code_execution_tool
155
 
156
  Think step by step and report your answer with the following template:
157
  FINAL ANSWER: [YOUR FINAL ANSWER].
 
25
  extract_text_from_image_tool,
26
  analyze_csv_file_tool,
27
  analyze_excel_file_tool,
 
28
  )
29
  import re
30
+ import tempfile
31
 
32
  # (Keep Constants as is)
33
  # --- Constants ---
 
48
  code_execution_tool,
49
  math_calculation_tool,
50
  python_repl_tool,
 
51
  extract_text_from_image_tool,
52
  analyze_csv_file_tool,
53
  analyze_excel_file_tool,
 
62
 
63
 
64
  # =========================
65
+ # Efficient File Handling - Download with Question
66
  # =========================
67
  def process_question_with_files(question_data: dict) -> str:
68
  """
69
+ Download file content when processing the question and include it directly.
70
+ This eliminates the need for the agent to download files separately.
71
  """
72
  question_text = question_data.get('question', '')
73
  file_name = question_data.get('file_name', '')
 
76
  if not file_name:
77
  return question_text
78
 
79
+ print(f"📎 Downloading file for question: {file_name}")
80
+
81
+ try:
82
+ # Download the file content directly
83
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
84
+ response = requests.get(file_url, timeout=15)
85
+ response.raise_for_status()
86
+
87
+ # Save file to temporary location for processing
88
+ temp_dir = tempfile.gettempdir()
89
+ local_file_path = os.path.join(temp_dir, file_name)
90
+
91
+ with open(local_file_path, "wb") as f:
92
+ f.write(response.content)
93
+
94
+ # Process the file based on its type
95
+ ext = file_name.lower().split('.')[-1]
96
+
97
+ if ext in ['mp3', 'wav', 'm4a', 'flac', 'ogg']:
98
+ result = audio_processing_tool.invoke(local_file_path)
99
+ file_info = f"[Audio Transcription: {result}]"
100
+ elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp']:
101
+ result = image_recognition_tool.invoke(local_file_path)
102
+ file_info = f"[Image Analysis: {result}]"
103
+ elif ext in ['csv', 'xls', 'xlsx']:
104
+ result = read_file_tool.invoke(local_file_path)
105
+ file_info = f"[Spreadsheet Content: {result}]"
106
+ elif ext in ['txt', 'md', 'py', 'json']:
107
+ result = read_file_tool.invoke(local_file_path)
108
+ file_info = f"[File Content: {result}]"
109
+ else:
110
+ result = read_file_tool.invoke(local_file_path)
111
+ file_info = f"[File Content: {result}]"
112
+
113
+ # Clean up the temporary file
114
+ try:
115
+ os.remove(local_file_path)
116
+ except Exception:
117
+ pass
118
 
119
+ return f"{question_text}\n\n{file_info}"
120
+
121
+ except Exception as e:
122
+ print(f"Error downloading/processing file {file_name}: {e}")
123
+ return f"{question_text}\n\n[Note: Could not download or process attached file {file_name}: {str(e)}]"
124
 
125
 
126
  def extract_final_answer(text: str) -> str:
 
176
  system_message = SystemMessage(content="""
177
  You are a helpful assistant tasked with answering questions using a set of tools.
178
 
179
+ IMPORTANT: File content is already processed and included in the question. You can use these tools to analyze the content:
180
+ - For images: use image_recognition_tool or extract_text_from_image_tool
181
+ - For audio: use audio_processing_tool
182
+ - For spreadsheets: use analyze_csv_file_tool or analyze_excel_file_tool
183
+ - For text files: use read_file_tool
184
+ - For code files: use python_execution_tool or code_execution_tool
185
+ - For math calculations: use math_calculation_tool
186
+ - For web searches: use serp_search_tool or wiki_search_tool
 
 
 
187
 
188
  Think step by step and report your answer with the following template:
189
  FINAL ANSWER: [YOUR FINAL ANSWER].
tools.py CHANGED
@@ -618,80 +618,9 @@ analyze_excel_file_tool = Tool(
618
  # Smart File Download Tool (Consolidated)
619
  # =========================
620
 
 
 
621
 
622
- def download_file_smart(file_source: str, filename: str = "") -> str:
623
- """
624
- Smart file download tool that handles:
625
- - GAIA files (using task_id)
626
- - Regular URLs
627
- - Content saving
628
-
629
- Args:
630
- file_source: task_id, URL, or content to save
631
- filename: Optional filename (auto-generated if not provided)
632
- """
633
- try:
634
- # Auto-detect the type of file_source
635
- if file_source.startswith(('http://', 'https://')):
636
- # Regular URL download
637
- if filename == "":
638
- path = urlparse(file_source).path
639
- filename = os.path.basename(path)
640
- if not filename:
641
- filename = f"downloaded_{uuid.uuid4().hex[:8]}"
642
-
643
- temp_dir = tempfile.gettempdir()
644
- filepath = os.path.join(temp_dir, filename)
645
-
646
- response = requests.get(file_source, stream=True, timeout=15)
647
- response.raise_for_status()
648
-
649
- with open(filepath, "wb") as f:
650
- for chunk in response.iter_content(chunk_size=8192):
651
- f.write(chunk)
652
-
653
- return f"File downloaded to {filepath}. You can read this file to process its contents."
654
-
655
- elif len(file_source) == 36 and '-' in file_source:
656
- # Likely a GAIA task_id (UUID format)
657
- if filename == "":
658
- filename = f"gaia_file_{file_source}"
659
-
660
- api_url = "https://agents-course-unit4-scoring.hf.space"
661
- file_url = f"{api_url}/files/{file_source}"
662
-
663
- temp_dir = tempfile.gettempdir()
664
- filepath = os.path.join(temp_dir, filename)
665
-
666
- response = requests.get(file_url, stream=True, timeout=15)
667
- response.raise_for_status()
668
-
669
- with open(filepath, "wb") as f:
670
- for chunk in response.iter_content(chunk_size=8192):
671
- f.write(chunk)
672
-
673
- return f"GAIA file downloaded to {filepath}. You can read this file to process its contents."
674
-
675
- else:
676
- # Treat as content to save
677
- if filename == "":
678
- temp_file = tempfile.NamedTemporaryFile(
679
- delete=False, dir=tempfile.gettempdir())
680
- filepath = temp_file.name
681
- else:
682
- filepath = os.path.join(tempfile.gettempdir(), filename)
683
-
684
- with open(filepath, "w") as f:
685
- f.write(file_source)
686
-
687
- return f"Content saved to {filepath}. You can read this file to process its contents."
688
-
689
- except Exception as e:
690
- return f"Error handling file: {str(e)}"
691
-
692
-
693
- download_file_tool = Tool(
694
- name="download_file_tool",
695
- func=download_file_smart,
696
- description="Smart file download tool: automatically detects if input is a GAIA task_id, URL, or content and handles accordingly. Use this for all file operations."
697
- )
 
618
  # Smart File Download Tool (Consolidated)
619
  # =========================
620
 
621
+ # Note: This tool is no longer needed since files are pre-processed
622
+ # when questions are fetched. The agent receives file content directly.
623
 
624
+ # =========================
625
+ # Image and Data Analysis Tools
626
+ # =========================