| | from typing import Any |
| | from subprocess import run |
| | from docquery import document, pipeline |
| | import tempfile |
| | import os |
| | |
| |
|
| | |
| | run("apt install -y tesseract-ocr", shell=True, check=True) |
| |
|
| | class EndpointHandler: |
| | def __init__(self, path=""): |
| | |
| | |
| | |
| | |
| | |
| | self.pipeline = pipeline('document-question-answering', model=path) |
| |
|
| | def __call__(self, data: dict[str, bytes]) -> dict[str, list[Any]]: |
| | """ |
| | Args: |
| | data (:obj:): |
| | includes: |
| | - pdf bytes |
| | """ |
| | |
| | f_bytes = data.pop("inputs", data) |
| | try: |
| | temp_file_name = next(tempfile._get_candidate_names()) |
| | temp_file_path = os.path.join('/tmp', f'{temp_file_name}.pdf') |
| | with open(temp_file_path, 'wb') as temp_file: |
| | temp_file.write(f_bytes) |
| |
|
| | if not os.path.exists(temp_file_path): |
| | raise ValueError(f'File not found at path: {temp_file_path}') |
| |
|
| | results = [] |
| | doc = document.load_document(temp_file_path) |
| | for q in ["What is the invoice number?", "What is the invoice total?"]: |
| | result = self.pipeline(question=q, **doc.context) |
| | results.append(result) |
| |
|
| | except Exception as e: |
| | raise |
| | else: |
| | return {"predictions": results} |
| | finally: |
| | try: |
| | os.remove(temp_file_path) |
| | except FileNotFoundError as e: |
| | print(e) |
| |
|
| |
|
| |
|
| |
|
| |
|