snake11235 commited on
Commit
4ae3e44
·
1 Parent(s): c0d7f7e

feat: refactor OpenAI backend and logging into separate modules

Browse files

Extract OpenAI-specific functionality and logging utilities into dedicated modules for better code organization and maintainability. Add *.pyc to gitignore.

- Move OPENAI_PRICING, _calculate_openai_cost(), and _run_openai_vision() to openai_backend.py
- Move _log() and _log_debug() functions to logging_helper.py
- Update imports in app.py to use new modules
- Remove try/except wrapper around OpenAI import (now handled in backend module

Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +3 -130
  3. logging_helper.py +10 -0
  4. openai_backend.py +130 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
  .env
 
 
1
  .env
2
+ *.pyc
app.py CHANGED
@@ -13,11 +13,8 @@ from PIL import Image
13
 
14
  from olmocr.data.renderpdf import render_pdf_to_base64png
15
 
16
- # Optional imports for cloud LLMs
17
- try:
18
- from openai import OpenAI
19
- except ImportError: # pragma: no cover
20
- OpenAI = None # type: ignore
21
 
22
  try:
23
  import google.generativeai as genai
@@ -30,8 +27,6 @@ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
30
 
31
  APP_TITLE = "words2doc"
32
  APP_DESCRIPTION = "Upload a PDF or image with (handwritten) text and convert it to CSV using different LLM backends."
33
-
34
- MODEL_CHATGPT = os.getenv("WORDS2DOC_OPENAI_MODEL", "gpt-5-mini")
35
  MODEL_GEMINI = "Gemini 3 Pro"
36
  MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
37
 
@@ -89,121 +84,6 @@ def _encode_image(image_path):
89
  return base64.b64encode(image_file.read()).decode("utf-8")
90
 
91
 
92
- OPENAI_PRICING = {
93
- # GPT-5.2 family
94
- "gpt-5.2": {"input": 1.75, "output": 14.00},
95
- "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
96
- "gpt-5.2-pro": {"input": 21.00, "output": 168.00},
97
-
98
- # GPT-5.1 / GPT-5 family
99
- "gpt-5.1": {"input": 1.25, "output": 10.00},
100
- "gpt-5": {"input": 1.25, "output": 10.00},
101
- "gpt-5-mini": {"input": 0.25, "output": 2.00},
102
- "gpt-5-nano": {"input": 0.05, "output": 0.40},
103
- "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
104
- "gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
105
- "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
106
- "gpt-5.1-codex": {"input": 1.25, "output": 10.00},
107
- "gpt-5-codex": {"input": 1.25, "output": 10.00},
108
- "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
109
- "gpt-5-pro": {"input": 15.00, "output": 120.00},
110
- "gpt-5-search-api": {"input": 1.25, "output": 10.00},
111
-
112
- # GPT-4.1 family
113
- "gpt-4.1": {"input": 2.00, "output": 8.00},
114
- "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
115
- "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
116
-
117
- # GPT-4o family
118
- "gpt-4o": {"input": 2.50, "output": 10.00},
119
- "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
120
- "gpt-4o-mini": {"input": 0.15, "output": 0.60},
121
- "chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
122
-
123
- # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
124
- "gpt-4-turbo": {"input": 10.00, "output": 30.00},
125
- "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
126
- "gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
127
- "gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
128
- "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
129
- "gpt-4-0613": {"input": 30.00, "output": 60.00},
130
- "gpt-4-0314": {"input": 30.00, "output": 60.00},
131
- "gpt-4": {"input": 30.00, "output": 60.00},
132
- "gpt-4-32k": {"input": 60.00, "output": 120.00},
133
-
134
- # Default
135
- "default": {"input": 2.50, "output": 10.00},
136
- }
137
-
138
-
139
- def _calculate_openai_cost(usage, model_name: str) -> float:
140
- """Calculate cost based on token usage and model pricing (per 1M tokens)."""
141
- if not usage:
142
- return 0.0
143
-
144
- pricing = OPENAI_PRICING.get(model_name, OPENAI_PRICING["default"])
145
- input_cost = (usage.prompt_tokens / 1_000_000) * pricing["input"]
146
- output_cost = (usage.completion_tokens / 1_000_000) * pricing["output"]
147
- return input_cost + output_cost
148
-
149
-
150
- def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
151
- if OpenAI is None:
152
- raise RuntimeError("openai package is not installed. Please install it to use ChatGPT 5.2 backend.")
153
-
154
- api_key = os.getenv("OPENAI_API_KEY")
155
- if not api_key:
156
- raise RuntimeError("OPENAI_API_KEY environment variable is not set.")
157
-
158
- client = OpenAI(api_key=api_key)
159
-
160
- buffered = BytesIO()
161
- image.save(buffered, format="JPEG")
162
- img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
163
-
164
- _log_debug(f"Using OpenAI model: {model_name}")
165
- _log_debug(f"Input image size: {image.size}")
166
-
167
- start_time = time.perf_counter()
168
-
169
- response = client.chat.completions.create(
170
- model=model_name,
171
- messages=[
172
- {
173
- "role": "user",
174
- "content": [
175
- { "type": "text",
176
- "text": prompt
177
- },
178
- {
179
- "type": "image_url",
180
- "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
181
- },
182
- ],
183
- }
184
- ],
185
- max_completion_tokens=4048,
186
- )
187
-
188
- duration = time.perf_counter() - start_time
189
-
190
- # Cost calculation and logging
191
- usage = response.usage
192
- if usage:
193
- cost = _calculate_openai_cost(usage, model_name)
194
- _log(f"Model: {model_name}")
195
- _log(f"Token usage: Input={usage.prompt_tokens}, Output={usage.completion_tokens}, Total={usage.total_tokens}")
196
- _log(f"Estimated cost: ${cost:.6f}")
197
- _log(f"Execution time: {duration:.3f} seconds")
198
-
199
- content = response.choices[0].message.content or ""
200
- _log("OpenAI vision response received")
201
- _log_debug(f"Response length: {len(content)} characters")
202
- _log_debug(f"Result: {content}")
203
- _log_debug("End of result")
204
- return content
205
-
206
-
207
  def _run_gemini_vision(image: Image.Image, prompt: str) -> str:
208
  if genai is None:
209
  raise RuntimeError("google-generativeai package is not installed. Please install it to use Gemini backend.")
@@ -312,13 +192,6 @@ def process_document(file_obj, model_choice: str, prompt: str):
312
  csv_file_path = _write_csv_to_temp_file(csv_text)
313
  return csv_text, csv_file_path
314
 
315
- def _log(message: str):
316
- print(f"[WORDS2CSV] {message}")
317
-
318
- def _log_debug(message: str):
319
- if os.getenv("WORDS2CSV_DEBUG"):
320
- print(f"[WORDS2CSV-DEBUG] {message}")
321
-
322
  # -------- Gradio UI -------- #
323
 
324
 
@@ -337,7 +210,7 @@ def build_interface() -> gr.Blocks:
337
  model_selector = gr.Dropdown(
338
  label="LLM backend",
339
  choices=list(OPENAI_PRICING.keys()) + [MODEL_GEMINI, MODEL_OLMOCR],
340
- value=MODEL_CHATGPT,
341
  )
342
 
343
  prompt_editor = gr.Textbox(
 
13
 
14
  from olmocr.data.renderpdf import render_pdf_to_base64png
15
 
16
+ from openai_backend import OPENAI_PRICING, _run_openai_vision
17
+ from logging_helper import log as _log, log_debug as _log_debug
 
 
 
18
 
19
  try:
20
  import google.generativeai as genai
 
27
 
28
  APP_TITLE = "words2doc"
29
  APP_DESCRIPTION = "Upload a PDF or image with (handwritten) text and convert it to CSV using different LLM backends."
 
 
30
  MODEL_GEMINI = "Gemini 3 Pro"
31
  MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
32
 
 
84
  return base64.b64encode(image_file.read()).decode("utf-8")
85
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def _run_gemini_vision(image: Image.Image, prompt: str) -> str:
88
  if genai is None:
89
  raise RuntimeError("google-generativeai package is not installed. Please install it to use Gemini backend.")
 
192
  csv_file_path = _write_csv_to_temp_file(csv_text)
193
  return csv_text, csv_file_path
194
 
 
 
 
 
 
 
 
195
  # -------- Gradio UI -------- #
196
 
197
 
 
210
  model_selector = gr.Dropdown(
211
  label="LLM backend",
212
  choices=list(OPENAI_PRICING.keys()) + [MODEL_GEMINI, MODEL_OLMOCR],
213
+ value=list(OPENAI_PRICING.keys())[0],
214
  )
215
 
216
  prompt_editor = gr.Textbox(
logging_helper.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def log(message: str) -> None:
5
+ print(f"[WORDS2CSV] {message}")
6
+
7
+
8
+ def log_debug(message: str) -> None:
9
+ if os.getenv("WORDS2CSV_DEBUG"):
10
+ print(f"[WORDS2CSV-DEBUG] {message}")
openai_backend.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import time
4
+ from io import BytesIO
5
+
6
+ from typing import Optional
7
+
8
+ from PIL import Image
9
+
10
+ from logging_helper import log as _log, log_debug as _log_debug
11
+
12
+ try:
13
+ from openai import OpenAI
14
+ except ImportError: # pragma: no cover
15
+ OpenAI = None # type: ignore
16
+
17
+
18
+ OPENAI_PRICING = {
19
+ # GPT-5.2 family
20
+ "gpt-5.2": {"input": 1.75, "output": 14.00},
21
+ "gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
22
+ "gpt-5.2-pro": {"input": 21.00, "output": 168.00},
23
+
24
+ # GPT-5.1 / GPT-5 family
25
+ "gpt-5.1": {"input": 1.25, "output": 10.00},
26
+ "gpt-5": {"input": 1.25, "output": 10.00},
27
+ "gpt-5-mini": {"input": 0.25, "output": 2.00},
28
+ "gpt-5-nano": {"input": 0.05, "output": 0.40},
29
+ "gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
30
+ "gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
31
+ "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
32
+ "gpt-5.1-codex": {"input": 1.25, "output": 10.00},
33
+ "gpt-5-codex": {"input": 1.25, "output": 10.00},
34
+ "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
35
+ "gpt-5-pro": {"input": 15.00, "output": 120.00},
36
+ "gpt-5-search-api": {"input": 1.25, "output": 10.00},
37
+
38
+ # GPT-4.1 family
39
+ "gpt-4.1": {"input": 2.00, "output": 8.00},
40
+ "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
41
+ "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
42
+
43
+ # GPT-4o family
44
+ "gpt-4o": {"input": 2.50, "output": 10.00},
45
+ "gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
46
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
47
+ "chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
48
+
49
+ # GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
50
+ "gpt-4-turbo": {"input": 10.00, "output": 30.00},
51
+ "gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
52
+ "gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
53
+ "gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
54
+ "gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
55
+ "gpt-4-0613": {"input": 30.00, "output": 60.00},
56
+ "gpt-4-0314": {"input": 30.00, "output": 60.00},
57
+ "gpt-4": {"input": 30.00, "output": 60.00},
58
+ "gpt-4-32k": {"input": 60.00, "output": 120.00},
59
+
60
+ # Default
61
+ "default": {"input": 2.50, "output": 10.00},
62
+ }
63
+
64
+
65
+ def _calculate_openai_cost(usage, model_name: str) -> float:
66
+ """Calculate cost based on token usage and model pricing (per 1M tokens)."""
67
+ if not usage:
68
+ return 0.0
69
+
70
+ pricing = OPENAI_PRICING.get(model_name, OPENAI_PRICING["default"])
71
+ input_cost = (usage.prompt_tokens / 1_000_000) * pricing["input"]
72
+ output_cost = (usage.completion_tokens / 1_000_000) * pricing["output"]
73
+ return input_cost + output_cost
74
+
75
+
76
+ def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
77
+ if OpenAI is None:
78
+ raise RuntimeError("openai package is not installed. Please install it to use ChatGPT 5.2 backend.")
79
+
80
+ api_key = os.getenv("OPENAI_API_KEY")
81
+ if not api_key:
82
+ raise RuntimeError("OPENAI_API_KEY environment variable is not set.")
83
+
84
+ client = OpenAI(api_key=api_key)
85
+
86
+ buffered = BytesIO()
87
+ image.save(buffered, format="JPEG")
88
+ img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
89
+
90
+ _log_debug(f"Using OpenAI model: {model_name}")
91
+ _log_debug(f"Input image size: {image.size}")
92
+
93
+ start_time = time.perf_counter()
94
+
95
+ response = client.chat.completions.create(
96
+ model=model_name,
97
+ messages=[
98
+ {
99
+ "role": "user",
100
+ "content": [
101
+ {"type": "text", "text": prompt},
102
+ {
103
+ "type": "image_url",
104
+ "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"},
105
+ },
106
+ ],
107
+ }
108
+ ],
109
+ max_completion_tokens=4048,
110
+ )
111
+
112
+ duration = time.perf_counter() - start_time
113
+
114
+ usage = response.usage
115
+ if usage:
116
+ cost = _calculate_openai_cost(usage, model_name)
117
+ _log(f"Model: {model_name}")
118
+ _log(
119
+ "Token usage: Input={usage.prompt_tokens}, "
120
+ "Output={usage.completion_tokens}, Total={usage.total_tokens}".format(usage=usage)
121
+ )
122
+ _log(f"Estimated cost: ${cost:.6f}")
123
+ _log(f"Execution time: {duration:.3f} seconds")
124
+
125
+ content = response.choices[0].message.content or ""
126
+ _log("OpenAI vision response received")
127
+ _log_debug(f"Response length: {len(content)} characters")
128
+ _log_debug(f"Result: {content}")
129
+ _log_debug("End of result")
130
+ return content