Spaces:
Running
Running
| from typing import Any, Optional | |
| from smolagents.tools import Tool | |
| import transformers | |
| import PyPDF2 | |
| import io | |
| import requests | |
| class DocumentAnalyzer(Tool): | |
| """ | |
| A tool that analyzes PDF documents and extracts key information. | |
| """ | |
| name = "analyze_document" | |
| description = "Analyzes a PDF document and extracts key information like summary and sentiment" | |
| inputs = {'document_url': {'type': 'string', 'description': 'URL to a PDF document'}} | |
| output_type = "string" | |
| def __init__(self): | |
| super().__init__() | |
| def forward(self, document_url: str) -> dict: | |
| """ | |
| Analyzes a PDF document and extracts key information. | |
| Args: | |
| document_url (str): URL to a PDF document | |
| Returns: | |
| dict: Contains summary, key points, and sentiment | |
| """ | |
| import PyPDF2 | |
| import io | |
| import requests | |
| from transformers import pipeline | |
| # Download the document | |
| response = requests.get(document_url) | |
| pdf_file = io.BytesIO(response.content) | |
| # Extract text | |
| reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| # Summarize text | |
| summarizer = pipeline("summarization", max_length=100) | |
| summary = summarizer(text[:1024])[0]['summary_text'] | |
| # Sentiment analysis | |
| sentiment_analyzer = pipeline("sentiment-analysis") | |
| sentiment = sentiment_analyzer(text[:512])[0] | |
| return f"Summary: {summary}\nSentiment: {sentiment['label']}\nConfidence: {sentiment['score']:.2f}" | |