Spaces:
Running
Running
Upload main.py
Browse files
main.py
CHANGED
|
@@ -1027,8 +1027,8 @@ async def web_search_internal(query: str, num_results: int = 3) -> str:
|
|
| 1027 |
# fallback: return list of sources if no sentences extracted
|
| 1028 |
out = "No summarizable text found. Sources:\n"
|
| 1029 |
for i, e in enumerate(entries, start=1):
|
| 1030 |
-
stars = "
|
| 1031 |
-
out += f"{i}. {e['title']} β {e['domain'] or e['link']} {stars}\n {e['link']}\n"
|
| 1032 |
return out
|
| 1033 |
|
| 1034 |
# Prepare query tokens for scoring (lowercased, split, remove very short tokens)
|
|
@@ -1080,10 +1080,10 @@ async def web_search_internal(query: str, num_results: int = 3) -> str:
|
|
| 1080 |
out_lines.append("Summary:")
|
| 1081 |
out_lines.append(" ".join(summary_with_cites))
|
| 1082 |
out_lines.append("\nSources:")
|
| 1083 |
-
# List ALL top results (numbered) β show
|
| 1084 |
for i, e in enumerate(entries, start=1):
|
| 1085 |
-
stars = "
|
| 1086 |
-
out_lines.append(f"{i}. {e['title'] or '(no title)'} β {e['domain'] or e['link']} {stars}")
|
| 1087 |
if e["snippet"]:
|
| 1088 |
out_lines.append(f" {e['snippet']}")
|
| 1089 |
out_lines.append(f" {e['link']}")
|
|
@@ -1169,7 +1169,11 @@ async def execute_tool(tool_name: str, user_id: str, **kwargs):
|
|
| 1169 |
query = kwargs.get('query')
|
| 1170 |
if not query:
|
| 1171 |
return {"error": "Search query is missing for web_search tool."}
|
| 1172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1173 |
|
| 1174 |
if tool_name == "get_current_date":
|
| 1175 |
return get_current_date_internal()
|
|
@@ -1372,6 +1376,100 @@ async def submit_labeled_image(
|
|
| 1372 |
|
| 1373 |
return {"status": "success", "message": "Thank you for your feedback! It will be reviewed to help me learn."}
|
| 1374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1375 |
# --- Helper function to preprocess images ---
|
| 1376 |
def preprocess_image(image_bytes: bytes) -> np.ndarray:
|
| 1377 |
"""
|
|
@@ -1448,23 +1546,22 @@ async def image_analysis(user_id: str = Form(...), file: UploadFile = File(...))
|
|
| 1448 |
"user_feedback": None
|
| 1449 |
})
|
| 1450 |
|
| 1451 |
-
#
|
| 1452 |
-
|
| 1453 |
-
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
-
|
| 1457 |
-
|
| 1458 |
-
|
| 1459 |
-
|
| 1460 |
-
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
-
}
|
| 1468 |
|
| 1469 |
except Exception as e:
|
| 1470 |
return {"status": "error", "message": f"Unexpected failure: {str(e)}"}
|
|
|
|
| 1027 |
# fallback: return list of sources if no sentences extracted
|
| 1028 |
out = "No summarizable text found. Sources:\n"
|
| 1029 |
for i, e in enumerate(entries, start=1):
|
| 1030 |
+
stars = "*" * (1 + e["cred"]) # credibility: * to ***
|
| 1031 |
+
out += f"{i}. {e['title']} β {e['domain'] or e['link']} [{stars}]\n {e['link']}\n"
|
| 1032 |
return out
|
| 1033 |
|
| 1034 |
# Prepare query tokens for scoring (lowercased, split, remove very short tokens)
|
|
|
|
| 1080 |
out_lines.append("Summary:")
|
| 1081 |
out_lines.append(" ".join(summary_with_cites))
|
| 1082 |
out_lines.append("\nSources:")
|
| 1083 |
+
# List ALL top results (numbered) β show credibility and short metadata
|
| 1084 |
for i, e in enumerate(entries, start=1):
|
| 1085 |
+
stars = "*" * (1 + e["cred"]) # credibility: * to ***
|
| 1086 |
+
out_lines.append(f"{i}. {e['title'] or '(no title)'} β {e['domain'] or e['link']} [{stars}]")
|
| 1087 |
if e["snippet"]:
|
| 1088 |
out_lines.append(f" {e['snippet']}")
|
| 1089 |
out_lines.append(f" {e['link']}")
|
|
|
|
| 1169 |
query = kwargs.get('query')
|
| 1170 |
if not query:
|
| 1171 |
return {"error": "Search query is missing for web_search tool."}
|
| 1172 |
+
# web_search_internal returns a plain string (the formatted summary).
|
| 1173 |
+
# We wrap it in a dict so json.dumps(tool_output) in the tool handler
|
| 1174 |
+
# serialises it as {"result": "..."} instead of double-encoding a raw string.
|
| 1175 |
+
result = await web_search_internal(query)
|
| 1176 |
+
return {"result": result}
|
| 1177 |
|
| 1178 |
if tool_name == "get_current_date":
|
| 1179 |
return get_current_date_internal()
|
|
|
|
| 1376 |
|
| 1377 |
return {"status": "success", "message": "Thank you for your feedback! It will be reviewed to help me learn."}
|
| 1378 |
|
| 1379 |
+
# --- Natural language interpretation builder ---
|
| 1380 |
+
def build_image_interpretation(predictions: list, ocr_text: str) -> dict:
|
| 1381 |
+
"""
|
| 1382 |
+
Turns MobileNetV2 predictions and OCR text into a single natural-language
|
| 1383 |
+
description of the image. Uses only the predictions whose confidence is
|
| 1384 |
+
high enough to be reliable β never guesses below the noise floor.
|
| 1385 |
+
|
| 1386 |
+
Logic:
|
| 1387 |
+
- Top prediction >= 60% β high confidence, anchor the sentence around it
|
| 1388 |
+
- Additional predictions >= 8% β include as supporting details
|
| 1389 |
+
- OCR text present β mention readable text found in the image
|
| 1390 |
+
- All labels are cleaned: underscores replaced, known clothing/object
|
| 1391 |
+
labels are grouped into human-readable phrases
|
| 1392 |
+
"""
|
| 1393 |
+
if not predictions:
|
| 1394 |
+
return {"description": "I was unable to identify the contents of this image."}
|
| 1395 |
+
|
| 1396 |
+
def clean_label(label: str) -> str:
|
| 1397 |
+
"""Convert ImageNet-style label to readable text."""
|
| 1398 |
+
label = label.replace("_", " ").strip()
|
| 1399 |
+
# Map common ImageNet labels to plain English phrases
|
| 1400 |
+
label_map = {
|
| 1401 |
+
"Windsor tie": "a Windsor tie",
|
| 1402 |
+
"bow tie": "a bow tie",
|
| 1403 |
+
"suit": "a suit",
|
| 1404 |
+
"jersey": "a jersey",
|
| 1405 |
+
"trench coat": "a trench coat",
|
| 1406 |
+
"lab coat": "a lab coat",
|
| 1407 |
+
"military uniform": "a military uniform",
|
| 1408 |
+
"academic gown": "an academic gown",
|
| 1409 |
+
"mortarboard": "a mortarboard",
|
| 1410 |
+
"sunglasses": "sunglasses",
|
| 1411 |
+
"sunglass": "sunglasses",
|
| 1412 |
+
"vestment": "vestments",
|
| 1413 |
+
"robe": "a robe",
|
| 1414 |
+
"wig": "a wig",
|
| 1415 |
+
"helmet": "a helmet",
|
| 1416 |
+
"hardhat": "a hard hat",
|
| 1417 |
+
"crash helmet": "a crash helmet",
|
| 1418 |
+
"brassiere": "a brassiere",
|
| 1419 |
+
"bikini": "a bikini",
|
| 1420 |
+
"miniskirt": "a miniskirt",
|
| 1421 |
+
"jean": "jeans",
|
| 1422 |
+
"jean shorts": "jean shorts",
|
| 1423 |
+
"sweatshirt": "a sweatshirt",
|
| 1424 |
+
"overskirt": "a skirt",
|
| 1425 |
+
"sarong": "a sarong",
|
| 1426 |
+
}
|
| 1427 |
+
return label_map.get(label, label)
|
| 1428 |
+
|
| 1429 |
+
top = predictions[0]
|
| 1430 |
+
top_label = clean_label(top["description"])
|
| 1431 |
+
top_conf = top["probability"]
|
| 1432 |
+
|
| 1433 |
+
# Collect supporting labels (>= 8% but not the top one)
|
| 1434 |
+
supporting = [
|
| 1435 |
+
clean_label(p["description"])
|
| 1436 |
+
for p in predictions[1:]
|
| 1437 |
+
if p["probability"] >= 0.08
|
| 1438 |
+
]
|
| 1439 |
+
|
| 1440 |
+
# Build the core sentence
|
| 1441 |
+
if top_conf >= 0.60:
|
| 1442 |
+
# High confidence β make a definite statement
|
| 1443 |
+
parts = [f"This image appears to show {top_label}"]
|
| 1444 |
+
if supporting:
|
| 1445 |
+
parts.append(f", along with {', '.join(supporting)}")
|
| 1446 |
+
parts.append(".")
|
| 1447 |
+
description = "".join(parts)
|
| 1448 |
+
elif top_conf >= 0.35:
|
| 1449 |
+
# Medium confidence β soften slightly
|
| 1450 |
+
parts = [f"This image likely contains {top_label}"]
|
| 1451 |
+
if supporting:
|
| 1452 |
+
parts.append(f", possibly also {', '.join(supporting)}")
|
| 1453 |
+
parts.append(". I'm moderately confident in this reading.")
|
| 1454 |
+
description = "".join(parts)
|
| 1455 |
+
else:
|
| 1456 |
+
# Low confidence β be honest
|
| 1457 |
+
all_labels = [clean_label(p["description"]) for p in predictions if p["probability"] >= 0.05]
|
| 1458 |
+
if all_labels:
|
| 1459 |
+
description = (
|
| 1460 |
+
f"I'm not very confident, but the image may contain: {', '.join(all_labels)}. "
|
| 1461 |
+
"You may want to verify this."
|
| 1462 |
+
)
|
| 1463 |
+
else:
|
| 1464 |
+
description = "I could not confidently identify the contents of this image."
|
| 1465 |
+
|
| 1466 |
+
# Append OCR findings if text was found
|
| 1467 |
+
if ocr_text and len(ocr_text.strip()) > 2:
|
| 1468 |
+
description += f" The image also contains readable text: \"{ocr_text.strip()[:300]}\""
|
| 1469 |
+
|
| 1470 |
+
return {"description": description}
|
| 1471 |
+
|
| 1472 |
+
|
| 1473 |
# --- Helper function to preprocess images ---
|
| 1474 |
def preprocess_image(image_bytes: bytes) -> np.ndarray:
|
| 1475 |
"""
|
|
|
|
| 1546 |
"user_feedback": None
|
| 1547 |
})
|
| 1548 |
|
| 1549 |
+
# --- Build a natural language interpretation from predictions + OCR ---
|
| 1550 |
+
interpretation = build_image_interpretation(predictions, ocr_text)
|
| 1551 |
+
|
| 1552 |
+
# Final response β always include the interpretation and an invitation
|
| 1553 |
+
response_payload = {
|
| 1554 |
+
"status": "success" if confident else "uncertain",
|
| 1555 |
+
"metadata": metadata_info,
|
| 1556 |
+
"predictions": predictions,
|
| 1557 |
+
"ocr_text": ocr_text if ocr_text else None,
|
| 1558 |
+
"interpretation": interpretation["description"],
|
| 1559 |
+
"follow_up": "How can I assist you with this picture? π"
|
| 1560 |
+
}
|
| 1561 |
+
if not confident:
|
| 1562 |
+
response_payload["needs_user_verification"] = True
|
| 1563 |
+
|
| 1564 |
+
return response_payload
|
|
|
|
| 1565 |
|
| 1566 |
except Exception as e:
|
| 1567 |
return {"status": "error", "message": f"Unexpected failure: {str(e)}"}
|