Spaces:

MehulJ
/

document_analyzer

Running

document_analyzer / tool.py

Upload tool

c2cfe9e verified 10 months ago

1.62 kB

	from typing import Any, Optional
	from smolagents.tools import Tool
	import transformers
	import PyPDF2
	import io
	import requests

	class DocumentAnalyzer(Tool):
	"""
	A tool that analyzes PDF documents and extracts key information.
	"""
	name = "analyze_document"
	description = "Analyzes a PDF document and extracts key information like summary and sentiment"
	inputs = {'document_url': {'type': 'string', 'description': 'URL to a PDF document'}}
	output_type = "string"

	def __init__(self):
	super().__init__()

	def forward(self, document_url: str) -> dict:
	"""
	Analyzes a PDF document and extracts key information.

	Args:
	document_url (str): URL to a PDF document

	Returns:
	dict: Contains summary, key points, and sentiment
	"""
	import PyPDF2
	import io
	import requests
	from transformers import pipeline

	# Download the document
	response = requests.get(document_url)
	pdf_file = io.BytesIO(response.content)

	# Extract text
	reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()

	# Summarize text
	summarizer = pipeline("summarization", max_length=100)
	summary = summarizer(text[:1024])[0]['summary_text']

	# Sentiment analysis
	sentiment_analyzer = pipeline("sentiment-analysis")
	sentiment = sentiment_analyzer(text[:512])[0]

	return f"Summary: {summary}\nSentiment: {sentiment['label']}\nConfidence: {sentiment['score']:.2f}"