Spaces:
Sleeping
Sleeping
updates on how we handle files
Browse files- __pycache__/tools.cpython-311.pyc +0 -0
- app.py +55 -23
- tools.py +5 -76
__pycache__/tools.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/tools.cpython-311.pyc and b/__pycache__/tools.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -25,9 +25,9 @@ from tools import (
|
|
| 25 |
extract_text_from_image_tool,
|
| 26 |
analyze_csv_file_tool,
|
| 27 |
analyze_excel_file_tool,
|
| 28 |
-
download_file_tool,
|
| 29 |
)
|
| 30 |
import re
|
|
|
|
| 31 |
|
| 32 |
# (Keep Constants as is)
|
| 33 |
# --- Constants ---
|
|
@@ -48,7 +48,6 @@ tools = [
|
|
| 48 |
code_execution_tool,
|
| 49 |
math_calculation_tool,
|
| 50 |
python_repl_tool,
|
| 51 |
-
download_file_tool,
|
| 52 |
extract_text_from_image_tool,
|
| 53 |
analyze_csv_file_tool,
|
| 54 |
analyze_excel_file_tool,
|
|
@@ -63,12 +62,12 @@ class MyAgent(TypedDict):
|
|
| 63 |
|
| 64 |
|
| 65 |
# =========================
|
| 66 |
-
#
|
| 67 |
# =========================
|
| 68 |
def process_question_with_files(question_data: dict) -> str:
|
| 69 |
"""
|
| 70 |
-
|
| 71 |
-
|
| 72 |
"""
|
| 73 |
question_text = question_data.get('question', '')
|
| 74 |
file_name = question_data.get('file_name', '')
|
|
@@ -77,15 +76,51 @@ def process_question_with_files(question_data: dict) -> str:
|
|
| 77 |
if not file_name:
|
| 78 |
return question_text
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
def extract_final_answer(text: str) -> str:
|
|
@@ -141,17 +176,14 @@ def assistant(state: MyAgent):
|
|
| 141 |
system_message = SystemMessage(content="""
|
| 142 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
| 143 |
|
| 144 |
-
IMPORTANT:
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
- For spreadsheets: use analyze_csv_file_tool or analyze_excel_file_tool
|
| 153 |
-
- For text files: use read_file_tool
|
| 154 |
-
- For code files: use python_execution_tool or code_execution_tool
|
| 155 |
|
| 156 |
Think step by step and report your answer with the following template:
|
| 157 |
FINAL ANSWER: [YOUR FINAL ANSWER].
|
|
|
|
| 25 |
extract_text_from_image_tool,
|
| 26 |
analyze_csv_file_tool,
|
| 27 |
analyze_excel_file_tool,
|
|
|
|
| 28 |
)
|
| 29 |
import re
|
| 30 |
+
import tempfile
|
| 31 |
|
| 32 |
# (Keep Constants as is)
|
| 33 |
# --- Constants ---
|
|
|
|
| 48 |
code_execution_tool,
|
| 49 |
math_calculation_tool,
|
| 50 |
python_repl_tool,
|
|
|
|
| 51 |
extract_text_from_image_tool,
|
| 52 |
analyze_csv_file_tool,
|
| 53 |
analyze_excel_file_tool,
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
# =========================
|
| 65 |
+
# Efficient File Handling - Download with Question
|
| 66 |
# =========================
|
| 67 |
def process_question_with_files(question_data: dict) -> str:
|
| 68 |
"""
|
| 69 |
+
Download file content when processing the question and include it directly.
|
| 70 |
+
This eliminates the need for the agent to download files separately.
|
| 71 |
"""
|
| 72 |
question_text = question_data.get('question', '')
|
| 73 |
file_name = question_data.get('file_name', '')
|
|
|
|
| 76 |
if not file_name:
|
| 77 |
return question_text
|
| 78 |
|
| 79 |
+
print(f"📎 Downloading file for question: {file_name}")
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
# Download the file content directly
|
| 83 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 84 |
+
response = requests.get(file_url, timeout=15)
|
| 85 |
+
response.raise_for_status()
|
| 86 |
+
|
| 87 |
+
# Save file to temporary location for processing
|
| 88 |
+
temp_dir = tempfile.gettempdir()
|
| 89 |
+
local_file_path = os.path.join(temp_dir, file_name)
|
| 90 |
+
|
| 91 |
+
with open(local_file_path, "wb") as f:
|
| 92 |
+
f.write(response.content)
|
| 93 |
+
|
| 94 |
+
# Process the file based on its type
|
| 95 |
+
ext = file_name.lower().split('.')[-1]
|
| 96 |
+
|
| 97 |
+
if ext in ['mp3', 'wav', 'm4a', 'flac', 'ogg']:
|
| 98 |
+
result = audio_processing_tool.invoke(local_file_path)
|
| 99 |
+
file_info = f"[Audio Transcription: {result}]"
|
| 100 |
+
elif ext in ['png', 'jpg', 'jpeg', 'gif', 'bmp']:
|
| 101 |
+
result = image_recognition_tool.invoke(local_file_path)
|
| 102 |
+
file_info = f"[Image Analysis: {result}]"
|
| 103 |
+
elif ext in ['csv', 'xls', 'xlsx']:
|
| 104 |
+
result = read_file_tool.invoke(local_file_path)
|
| 105 |
+
file_info = f"[Spreadsheet Content: {result}]"
|
| 106 |
+
elif ext in ['txt', 'md', 'py', 'json']:
|
| 107 |
+
result = read_file_tool.invoke(local_file_path)
|
| 108 |
+
file_info = f"[File Content: {result}]"
|
| 109 |
+
else:
|
| 110 |
+
result = read_file_tool.invoke(local_file_path)
|
| 111 |
+
file_info = f"[File Content: {result}]"
|
| 112 |
+
|
| 113 |
+
# Clean up the temporary file
|
| 114 |
+
try:
|
| 115 |
+
os.remove(local_file_path)
|
| 116 |
+
except Exception:
|
| 117 |
+
pass
|
| 118 |
|
| 119 |
+
return f"{question_text}\n\n{file_info}"
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"Error downloading/processing file {file_name}: {e}")
|
| 123 |
+
return f"{question_text}\n\n[Note: Could not download or process attached file {file_name}: {str(e)}]"
|
| 124 |
|
| 125 |
|
| 126 |
def extract_final_answer(text: str) -> str:
|
|
|
|
| 176 |
system_message = SystemMessage(content="""
|
| 177 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
| 178 |
|
| 179 |
+
IMPORTANT: File content is already processed and included in the question. You can use these tools to analyze the content:
|
| 180 |
+
- For images: use image_recognition_tool or extract_text_from_image_tool
|
| 181 |
+
- For audio: use audio_processing_tool
|
| 182 |
+
- For spreadsheets: use analyze_csv_file_tool or analyze_excel_file_tool
|
| 183 |
+
- For text files: use read_file_tool
|
| 184 |
+
- For code files: use python_execution_tool or code_execution_tool
|
| 185 |
+
- For math calculations: use math_calculation_tool
|
| 186 |
+
- For web searches: use serp_search_tool or wiki_search_tool
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
Think step by step and report your answer with the following template:
|
| 189 |
FINAL ANSWER: [YOUR FINAL ANSWER].
|
tools.py
CHANGED
|
@@ -618,80 +618,9 @@ analyze_excel_file_tool = Tool(
|
|
| 618 |
# Smart File Download Tool (Consolidated)
|
| 619 |
# =========================
|
| 620 |
|
|
|
|
|
|
|
| 621 |
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
- GAIA files (using task_id)
|
| 626 |
-
- Regular URLs
|
| 627 |
-
- Content saving
|
| 628 |
-
|
| 629 |
-
Args:
|
| 630 |
-
file_source: task_id, URL, or content to save
|
| 631 |
-
filename: Optional filename (auto-generated if not provided)
|
| 632 |
-
"""
|
| 633 |
-
try:
|
| 634 |
-
# Auto-detect the type of file_source
|
| 635 |
-
if file_source.startswith(('http://', 'https://')):
|
| 636 |
-
# Regular URL download
|
| 637 |
-
if filename == "":
|
| 638 |
-
path = urlparse(file_source).path
|
| 639 |
-
filename = os.path.basename(path)
|
| 640 |
-
if not filename:
|
| 641 |
-
filename = f"downloaded_{uuid.uuid4().hex[:8]}"
|
| 642 |
-
|
| 643 |
-
temp_dir = tempfile.gettempdir()
|
| 644 |
-
filepath = os.path.join(temp_dir, filename)
|
| 645 |
-
|
| 646 |
-
response = requests.get(file_source, stream=True, timeout=15)
|
| 647 |
-
response.raise_for_status()
|
| 648 |
-
|
| 649 |
-
with open(filepath, "wb") as f:
|
| 650 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 651 |
-
f.write(chunk)
|
| 652 |
-
|
| 653 |
-
return f"File downloaded to {filepath}. You can read this file to process its contents."
|
| 654 |
-
|
| 655 |
-
elif len(file_source) == 36 and '-' in file_source:
|
| 656 |
-
# Likely a GAIA task_id (UUID format)
|
| 657 |
-
if filename == "":
|
| 658 |
-
filename = f"gaia_file_{file_source}"
|
| 659 |
-
|
| 660 |
-
api_url = "https://agents-course-unit4-scoring.hf.space"
|
| 661 |
-
file_url = f"{api_url}/files/{file_source}"
|
| 662 |
-
|
| 663 |
-
temp_dir = tempfile.gettempdir()
|
| 664 |
-
filepath = os.path.join(temp_dir, filename)
|
| 665 |
-
|
| 666 |
-
response = requests.get(file_url, stream=True, timeout=15)
|
| 667 |
-
response.raise_for_status()
|
| 668 |
-
|
| 669 |
-
with open(filepath, "wb") as f:
|
| 670 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 671 |
-
f.write(chunk)
|
| 672 |
-
|
| 673 |
-
return f"GAIA file downloaded to {filepath}. You can read this file to process its contents."
|
| 674 |
-
|
| 675 |
-
else:
|
| 676 |
-
# Treat as content to save
|
| 677 |
-
if filename == "":
|
| 678 |
-
temp_file = tempfile.NamedTemporaryFile(
|
| 679 |
-
delete=False, dir=tempfile.gettempdir())
|
| 680 |
-
filepath = temp_file.name
|
| 681 |
-
else:
|
| 682 |
-
filepath = os.path.join(tempfile.gettempdir(), filename)
|
| 683 |
-
|
| 684 |
-
with open(filepath, "w") as f:
|
| 685 |
-
f.write(file_source)
|
| 686 |
-
|
| 687 |
-
return f"Content saved to {filepath}. You can read this file to process its contents."
|
| 688 |
-
|
| 689 |
-
except Exception as e:
|
| 690 |
-
return f"Error handling file: {str(e)}"
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
download_file_tool = Tool(
|
| 694 |
-
name="download_file_tool",
|
| 695 |
-
func=download_file_smart,
|
| 696 |
-
description="Smart file download tool: automatically detects if input is a GAIA task_id, URL, or content and handles accordingly. Use this for all file operations."
|
| 697 |
-
)
|
|
|
|
| 618 |
# Smart File Download Tool (Consolidated)
|
| 619 |
# =========================
|
| 620 |
|
| 621 |
+
# Note: This tool is no longer needed since files are pre-processed
|
| 622 |
+
# when questions are fetched. The agent receives file content directly.
|
| 623 |
|
| 624 |
+
# =========================
|
| 625 |
+
# Image and Data Analysis Tools
|
| 626 |
+
# =========================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|