DeepImagix commited on
Commit
73b2a0d
Β·
verified Β·
1 Parent(s): e5b5f82

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +120 -23
main.py CHANGED
@@ -1027,8 +1027,8 @@ async def web_search_internal(query: str, num_results: int = 3) -> str:
1027
  # fallback: return list of sources if no sentences extracted
1028
  out = "No summarizable text found. Sources:\n"
1029
  for i, e in enumerate(entries, start=1):
1030
- stars = "⭐️" * (1 + e["cred"]) # 1..3 stars
1031
- out += f"{i}. {e['title']} β€” {e['domain'] or e['link']} {stars}\n {e['link']}\n"
1032
  return out
1033
 
1034
  # Prepare query tokens for scoring (lowercased, split, remove very short tokens)
@@ -1080,10 +1080,10 @@ async def web_search_internal(query: str, num_results: int = 3) -> str:
1080
  out_lines.append("Summary:")
1081
  out_lines.append(" ".join(summary_with_cites))
1082
  out_lines.append("\nSources:")
1083
- # List ALL top results (numbered) β€” show stars and short metadata
1084
  for i, e in enumerate(entries, start=1):
1085
- stars = "⭐️" * (1 + e["cred"]) # 1..3 stars
1086
- out_lines.append(f"{i}. {e['title'] or '(no title)'} β€” {e['domain'] or e['link']} {stars}")
1087
  if e["snippet"]:
1088
  out_lines.append(f" {e['snippet']}")
1089
  out_lines.append(f" {e['link']}")
@@ -1169,7 +1169,11 @@ async def execute_tool(tool_name: str, user_id: str, **kwargs):
1169
  query = kwargs.get('query')
1170
  if not query:
1171
  return {"error": "Search query is missing for web_search tool."}
1172
- return await web_search_internal(query)
 
 
 
 
1173
 
1174
  if tool_name == "get_current_date":
1175
  return get_current_date_internal()
@@ -1372,6 +1376,100 @@ async def submit_labeled_image(
1372
 
1373
  return {"status": "success", "message": "Thank you for your feedback! It will be reviewed to help me learn."}
1374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1375
  # --- Helper function to preprocess images ---
1376
  def preprocess_image(image_bytes: bytes) -> np.ndarray:
1377
  """
@@ -1448,23 +1546,22 @@ async def image_analysis(user_id: str = Form(...), file: UploadFile = File(...))
1448
  "user_feedback": None
1449
  })
1450
 
1451
- # Final response
1452
- if confident:
1453
- return {
1454
- "status": "success",
1455
- "metadata": metadata_info,
1456
- "predictions": predictions,
1457
- "ocr_text": ocr_text
1458
- }
1459
- else:
1460
- return {
1461
- "status": "uncertain",
1462
- "metadata": metadata_info,
1463
- "predictions": predictions,
1464
- "ocr_text": ocr_text,
1465
- "needs_user_verification": True,
1466
- "message": "Not confident πŸ€”. Please confirm or correct this result."
1467
- }
1468
 
1469
  except Exception as e:
1470
  return {"status": "error", "message": f"Unexpected failure: {str(e)}"}
 
1027
  # fallback: return list of sources if no sentences extracted
1028
  out = "No summarizable text found. Sources:\n"
1029
  for i, e in enumerate(entries, start=1):
1030
+ stars = "*" * (1 + e["cred"]) # credibility: * to ***
1031
+ out += f"{i}. {e['title']} β€” {e['domain'] or e['link']} [{stars}]\n {e['link']}\n"
1032
  return out
1033
 
1034
  # Prepare query tokens for scoring (lowercased, split, remove very short tokens)
 
1080
  out_lines.append("Summary:")
1081
  out_lines.append(" ".join(summary_with_cites))
1082
  out_lines.append("\nSources:")
1083
+ # List ALL top results (numbered) β€” show credibility and short metadata
1084
  for i, e in enumerate(entries, start=1):
1085
+ stars = "*" * (1 + e["cred"]) # credibility: * to ***
1086
+ out_lines.append(f"{i}. {e['title'] or '(no title)'} β€” {e['domain'] or e['link']} [{stars}]")
1087
  if e["snippet"]:
1088
  out_lines.append(f" {e['snippet']}")
1089
  out_lines.append(f" {e['link']}")
 
1169
  query = kwargs.get('query')
1170
  if not query:
1171
  return {"error": "Search query is missing for web_search tool."}
1172
+ # web_search_internal returns a plain string (the formatted summary).
1173
+ # We wrap it in a dict so json.dumps(tool_output) in the tool handler
1174
+ # serialises it as {"result": "..."} instead of double-encoding a raw string.
1175
+ result = await web_search_internal(query)
1176
+ return {"result": result}
1177
 
1178
  if tool_name == "get_current_date":
1179
  return get_current_date_internal()
 
1376
 
1377
  return {"status": "success", "message": "Thank you for your feedback! It will be reviewed to help me learn."}
1378
 
1379
+ # --- Natural language interpretation builder ---
1380
+ def build_image_interpretation(predictions: list, ocr_text: str) -> dict:
1381
+ """
1382
+ Turns MobileNetV2 predictions and OCR text into a single natural-language
1383
+ description of the image. Uses only the predictions whose confidence is
1384
+ high enough to be reliable β€” never guesses below the noise floor.
1385
+
1386
+ Logic:
1387
+ - Top prediction >= 60% β†’ high confidence, anchor the sentence around it
1388
+ - Additional predictions >= 8% β†’ include as supporting details
1389
+ - OCR text present β†’ mention readable text found in the image
1390
+ - All labels are cleaned: underscores replaced, known clothing/object
1391
+ labels are grouped into human-readable phrases
1392
+ """
1393
+ if not predictions:
1394
+ return {"description": "I was unable to identify the contents of this image."}
1395
+
1396
+ def clean_label(label: str) -> str:
1397
+ """Convert ImageNet-style label to readable text."""
1398
+ label = label.replace("_", " ").strip()
1399
+ # Map common ImageNet labels to plain English phrases
1400
+ label_map = {
1401
+ "Windsor tie": "a Windsor tie",
1402
+ "bow tie": "a bow tie",
1403
+ "suit": "a suit",
1404
+ "jersey": "a jersey",
1405
+ "trench coat": "a trench coat",
1406
+ "lab coat": "a lab coat",
1407
+ "military uniform": "a military uniform",
1408
+ "academic gown": "an academic gown",
1409
+ "mortarboard": "a mortarboard",
1410
+ "sunglasses": "sunglasses",
1411
+ "sunglass": "sunglasses",
1412
+ "vestment": "vestments",
1413
+ "robe": "a robe",
1414
+ "wig": "a wig",
1415
+ "helmet": "a helmet",
1416
+ "hardhat": "a hard hat",
1417
+ "crash helmet": "a crash helmet",
1418
+ "brassiere": "a brassiere",
1419
+ "bikini": "a bikini",
1420
+ "miniskirt": "a miniskirt",
1421
+ "jean": "jeans",
1422
+ "jean shorts": "jean shorts",
1423
+ "sweatshirt": "a sweatshirt",
1424
+ "overskirt": "a skirt",
1425
+ "sarong": "a sarong",
1426
+ }
1427
+ return label_map.get(label, label)
1428
+
1429
+ top = predictions[0]
1430
+ top_label = clean_label(top["description"])
1431
+ top_conf = top["probability"]
1432
+
1433
+ # Collect supporting labels (>= 8% but not the top one)
1434
+ supporting = [
1435
+ clean_label(p["description"])
1436
+ for p in predictions[1:]
1437
+ if p["probability"] >= 0.08
1438
+ ]
1439
+
1440
+ # Build the core sentence
1441
+ if top_conf >= 0.60:
1442
+ # High confidence β€” make a definite statement
1443
+ parts = [f"This image appears to show {top_label}"]
1444
+ if supporting:
1445
+ parts.append(f", along with {', '.join(supporting)}")
1446
+ parts.append(".")
1447
+ description = "".join(parts)
1448
+ elif top_conf >= 0.35:
1449
+ # Medium confidence β€” soften slightly
1450
+ parts = [f"This image likely contains {top_label}"]
1451
+ if supporting:
1452
+ parts.append(f", possibly also {', '.join(supporting)}")
1453
+ parts.append(". I'm moderately confident in this reading.")
1454
+ description = "".join(parts)
1455
+ else:
1456
+ # Low confidence β€” be honest
1457
+ all_labels = [clean_label(p["description"]) for p in predictions if p["probability"] >= 0.05]
1458
+ if all_labels:
1459
+ description = (
1460
+ f"I'm not very confident, but the image may contain: {', '.join(all_labels)}. "
1461
+ "You may want to verify this."
1462
+ )
1463
+ else:
1464
+ description = "I could not confidently identify the contents of this image."
1465
+
1466
+ # Append OCR findings if text was found
1467
+ if ocr_text and len(ocr_text.strip()) > 2:
1468
+ description += f" The image also contains readable text: \"{ocr_text.strip()[:300]}\""
1469
+
1470
+ return {"description": description}
1471
+
1472
+
1473
  # --- Helper function to preprocess images ---
1474
  def preprocess_image(image_bytes: bytes) -> np.ndarray:
1475
  """
 
1546
  "user_feedback": None
1547
  })
1548
 
1549
+ # --- Build a natural language interpretation from predictions + OCR ---
1550
+ interpretation = build_image_interpretation(predictions, ocr_text)
1551
+
1552
+ # Final response β€” always include the interpretation and an invitation
1553
+ response_payload = {
1554
+ "status": "success" if confident else "uncertain",
1555
+ "metadata": metadata_info,
1556
+ "predictions": predictions,
1557
+ "ocr_text": ocr_text if ocr_text else None,
1558
+ "interpretation": interpretation["description"],
1559
+ "follow_up": "How can I assist you with this picture? 😊"
1560
+ }
1561
+ if not confident:
1562
+ response_payload["needs_user_verification"] = True
1563
+
1564
+ return response_payload
 
1565
 
1566
  except Exception as e:
1567
  return {"status": "error", "message": f"Unexpected failure: {str(e)}"}