Spaces:

snake11235
/

words2csv

Running

snake11235 commited on Dec 15, 2025

Commit

4ae3e44

1 Parent(s): c0d7f7e

feat: refactor OpenAI backend and logging into separate modules

Extract OpenAI-specific functionality and logging utilities into dedicated modules for better code organization and maintainability. Add *.pyc to gitignore.

- Move OPENAI_PRICING, _calculate_openai_cost(), and _run_openai_vision() to openai_backend.py
- Move _log() and _log_debug() functions to logging_helper.py
- Update imports in app.py to use new modules
- Remove try/except wrapper around OpenAI import (now handled in backend module

Files changed (4) hide show

.gitignore +1 -0
app.py +3 -130
logging_helper.py +10 -0
openai_backend.py +130 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	.env


1	.env
2	+ *.pyc

app.py CHANGED Viewed

@@ -13,11 +13,8 @@ from PIL import Image
 from olmocr.data.renderpdf import render_pdf_to_base64png
-# Optional imports for cloud LLMs
-try:
-    from openai import OpenAI
-except ImportError:  # pragma: no cover
-    OpenAI = None  # type: ignore
 try:
     import google.generativeai as genai
@@ -30,8 +27,6 @@ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
 APP_TITLE = "words2doc"
 APP_DESCRIPTION = "Upload a PDF or image with (handwritten) text and convert it to CSV using different LLM backends."
-MODEL_CHATGPT = os.getenv("WORDS2DOC_OPENAI_MODEL", "gpt-5-mini")
 MODEL_GEMINI = "Gemini 3 Pro"
 MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
@@ -89,121 +84,6 @@ def _encode_image(image_path):
         return base64.b64encode(image_file.read()).decode("utf-8")
-OPENAI_PRICING = {
-    # GPT-5.2 family
-    "gpt-5.2": {"input": 1.75, "output": 14.00},
-    "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
-    "gpt-5.2-pro": {"input": 21.00, "output": 168.00},
-    # GPT-5.1 / GPT-5 family
-    "gpt-5.1": {"input": 1.25, "output": 10.00},
-    "gpt-5": {"input": 1.25, "output": 10.00},
-    "gpt-5-mini": {"input": 0.25, "output": 2.00},
-    "gpt-5-nano": {"input": 0.05, "output": 0.40},
-    "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
-    "gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
-    "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
-    "gpt-5.1-codex": {"input": 1.25, "output": 10.00},
-    "gpt-5-codex": {"input": 1.25, "output": 10.00},
-    "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
-    "gpt-5-pro": {"input": 15.00, "output": 120.00},
-    "gpt-5-search-api": {"input": 1.25, "output": 10.00},
-    # GPT-4.1 family
-    "gpt-4.1": {"input": 2.00, "output": 8.00},
-    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
-    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
-    # GPT-4o family
-    "gpt-4o": {"input": 2.50, "output": 10.00},
-    "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
-    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
-    "chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
-    # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
-    "gpt-4-turbo": {"input": 10.00, "output": 30.00},
-    "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
-    "gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
-    "gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
-    "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
-    "gpt-4-0613": {"input": 30.00, "output": 60.00},
-    "gpt-4-0314": {"input": 30.00, "output": 60.00},
-    "gpt-4": {"input": 30.00, "output": 60.00},
-    "gpt-4-32k": {"input": 60.00, "output": 120.00},
-    # Default
-    "default": {"input": 2.50, "output": 10.00},
-}
-def _calculate_openai_cost(usage, model_name: str) -> float:
-    """Calculate cost based on token usage and model pricing (per 1M tokens)."""
-    if not usage:
-        return 0.0
-    pricing = OPENAI_PRICING.get(model_name, OPENAI_PRICING["default"])
-    input_cost = (usage.prompt_tokens / 1_000_000) * pricing["input"]
-    output_cost = (usage.completion_tokens / 1_000_000) * pricing["output"]
-    return input_cost + output_cost
-def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
-    if OpenAI is None:
-        raise RuntimeError("openai package is not installed. Please install it to use ChatGPT 5.2 backend.")
-    api_key = os.getenv("OPENAI_API_KEY")
-    if not api_key:
-        raise RuntimeError("OPENAI_API_KEY environment variable is not set.")
-    client = OpenAI(api_key=api_key)
-    buffered = BytesIO()
-    image.save(buffered, format="JPEG")
-    img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
-    _log_debug(f"Using OpenAI model: {model_name}")
-    _log_debug(f"Input image size: {image.size}")
-    start_time = time.perf_counter()
-    response = client.chat.completions.create(
-        model=model_name,
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {   "type": "text",
-                        "text": prompt
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
-                    },
-                ],
-            }
-        ],
-        max_completion_tokens=4048,
-    )
-    duration = time.perf_counter() - start_time
-    # Cost calculation and logging
-    usage = response.usage
-    if usage:
-        cost = _calculate_openai_cost(usage, model_name)
-        _log(f"Model: {model_name}")
-        _log(f"Token usage: Input={usage.prompt_tokens}, Output={usage.completion_tokens}, Total={usage.total_tokens}")
-        _log(f"Estimated cost: ${cost:.6f}")
-        _log(f"Execution time: {duration:.3f} seconds")
-    content = response.choices[0].message.content or ""
-    _log("OpenAI vision response received")
-    _log_debug(f"Response length: {len(content)} characters")
-    _log_debug(f"Result: {content}")
-    _log_debug("End of result")
-    return content
 def _run_gemini_vision(image: Image.Image, prompt: str) -> str:
     if genai is None:
         raise RuntimeError("google-generativeai package is not installed. Please install it to use Gemini backend.")
@@ -312,13 +192,6 @@ def process_document(file_obj, model_choice: str, prompt: str):
     csv_file_path = _write_csv_to_temp_file(csv_text)
     return csv_text, csv_file_path
-def _log(message: str):
-    print(f"[WORDS2CSV] {message}")
-def _log_debug(message: str):
-    if os.getenv("WORDS2CSV_DEBUG"):
-        print(f"[WORDS2CSV-DEBUG] {message}")
 # -------- Gradio UI -------- #
@@ -337,7 +210,7 @@ def build_interface() -> gr.Blocks:
                 model_selector = gr.Dropdown(
                     label="LLM backend",
                     choices=list(OPENAI_PRICING.keys()) + [MODEL_GEMINI, MODEL_OLMOCR],
-                    value=MODEL_CHATGPT,
                 )
                 prompt_editor = gr.Textbox(

 from olmocr.data.renderpdf import render_pdf_to_base64png
+from openai_backend import OPENAI_PRICING, _run_openai_vision
+from logging_helper import log as _log, log_debug as _log_debug
 try:
     import google.generativeai as genai
 APP_TITLE = "words2doc"
 APP_DESCRIPTION = "Upload a PDF or image with (handwritten) text and convert it to CSV using different LLM backends."
 MODEL_GEMINI = "Gemini 3 Pro"
 MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
         return base64.b64encode(image_file.read()).decode("utf-8")
 def _run_gemini_vision(image: Image.Image, prompt: str) -> str:
     if genai is None:
         raise RuntimeError("google-generativeai package is not installed. Please install it to use Gemini backend.")
     csv_file_path = _write_csv_to_temp_file(csv_text)
     return csv_text, csv_file_path
 # -------- Gradio UI -------- #
                 model_selector = gr.Dropdown(
                     label="LLM backend",
                     choices=list(OPENAI_PRICING.keys()) + [MODEL_GEMINI, MODEL_OLMOCR],
+                    value=list(OPENAI_PRICING.keys())[0],
                 )
                 prompt_editor = gr.Textbox(

logging_helper.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import os
+def log(message: str) -> None:
+    print(f"[WORDS2CSV] {message}")
+def log_debug(message: str) -> None:
+    if os.getenv("WORDS2CSV_DEBUG"):
+        print(f"[WORDS2CSV-DEBUG] {message}")

openai_backend.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import os
+import base64
+import time
+from io import BytesIO
+from typing import Optional
+from PIL import Image
+from logging_helper import log as _log, log_debug as _log_debug
+try:
+    from openai import OpenAI
+except ImportError:  # pragma: no cover
+    OpenAI = None  # type: ignore
+OPENAI_PRICING = {
+    # GPT-5.2 family
+    "gpt-5.2": {"input": 1.75, "output": 14.00},
+    "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
+    "gpt-5.2-pro": {"input": 21.00, "output": 168.00},
+    # GPT-5.1 / GPT-5 family
+    "gpt-5.1": {"input": 1.25, "output": 10.00},
+    "gpt-5": {"input": 1.25, "output": 10.00},
+    "gpt-5-mini": {"input": 0.25, "output": 2.00},
+    "gpt-5-nano": {"input": 0.05, "output": 0.40},
+    "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
+    "gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
+    "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
+    "gpt-5.1-codex": {"input": 1.25, "output": 10.00},
+    "gpt-5-codex": {"input": 1.25, "output": 10.00},
+    "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
+    "gpt-5-pro": {"input": 15.00, "output": 120.00},
+    "gpt-5-search-api": {"input": 1.25, "output": 10.00},
+    # GPT-4.1 family
+    "gpt-4.1": {"input": 2.00, "output": 8.00},
+    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
+    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
+    # GPT-4o family
+    "gpt-4o": {"input": 2.50, "output": 10.00},
+    "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
+    "chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
+    # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
+    "gpt-4-turbo": {"input": 10.00, "output": 30.00},
+    "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
+    "gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
+    "gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
+    "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
+    "gpt-4-0613": {"input": 30.00, "output": 60.00},
+    "gpt-4-0314": {"input": 30.00, "output": 60.00},
+    "gpt-4": {"input": 30.00, "output": 60.00},
+    "gpt-4-32k": {"input": 60.00, "output": 120.00},
+    # Default
+    "default": {"input": 2.50, "output": 10.00},
+}
+def _calculate_openai_cost(usage, model_name: str) -> float:
+    """Calculate cost based on token usage and model pricing (per 1M tokens)."""
+    if not usage:
+        return 0.0
+    pricing = OPENAI_PRICING.get(model_name, OPENAI_PRICING["default"])
+    input_cost = (usage.prompt_tokens / 1_000_000) * pricing["input"]
+    output_cost = (usage.completion_tokens / 1_000_000) * pricing["output"]
+    return input_cost + output_cost
+def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
+    if OpenAI is None:
+        raise RuntimeError("openai package is not installed. Please install it to use ChatGPT 5.2 backend.")
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise RuntimeError("OPENAI_API_KEY environment variable is not set.")
+    client = OpenAI(api_key=api_key)
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    _log_debug(f"Using OpenAI model: {model_name}")
+    _log_debug(f"Input image size: {image.size}")
+    start_time = time.perf_counter()
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
+                    },
+                ],
+            }
+        ],
+        max_completion_tokens=4048,
+    )
+    duration = time.perf_counter() - start_time
+    usage = response.usage
+    if usage:
+        cost = _calculate_openai_cost(usage, model_name)
+        _log(f"Model: {model_name}")
+        _log(
+            "Token usage: Input={usage.prompt_tokens}, "
+            "Output={usage.completion_tokens}, Total={usage.total_tokens}".format(usage=usage)
+        )
+        _log(f"Estimated cost: ${cost:.6f}")
+        _log(f"Execution time: {duration:.3f} seconds")
+    content = response.choices[0].message.content or ""
+    _log("OpenAI vision response received")
+    _log_debug(f"Response length: {len(content)} characters")
+    _log_debug(f"Result: {content}")
+    _log_debug("End of result")
+    return content