MehulJ's picture
Upload tool
c2cfe9e verified
from typing import Any, Optional
from smolagents.tools import Tool
import transformers
import PyPDF2
import io
import requests
class DocumentAnalyzer(Tool):
"""
A tool that analyzes PDF documents and extracts key information.
"""
name = "analyze_document"
description = "Analyzes a PDF document and extracts key information like summary and sentiment"
inputs = {'document_url': {'type': 'string', 'description': 'URL to a PDF document'}}
output_type = "string"
def __init__(self):
super().__init__()
def forward(self, document_url: str) -> dict:
"""
Analyzes a PDF document and extracts key information.
Args:
document_url (str): URL to a PDF document
Returns:
dict: Contains summary, key points, and sentiment
"""
import PyPDF2
import io
import requests
from transformers import pipeline
# Download the document
response = requests.get(document_url)
pdf_file = io.BytesIO(response.content)
# Extract text
reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
# Summarize text
summarizer = pipeline("summarization", max_length=100)
summary = summarizer(text[:1024])[0]['summary_text']
# Sentiment analysis
sentiment_analyzer = pipeline("sentiment-analysis")
sentiment = sentiment_analyzer(text[:512])[0]
return f"Summary: {summary}\nSentiment: {sentiment['label']}\nConfidence: {sentiment['score']:.2f}"