Spaces:

DeepImagix
/

self-trained2

Running

App Files Files Community

DeepImagix commited on 3 days ago

Commit

73b2a0d

verified ·

1 Parent(s): e5b5f82

Upload main.py

Browse files

Files changed (1) hide show

main.py +120 -23

main.py CHANGED Viewed

@@ -1027,8 +1027,8 @@ async def web_search_internal(query: str, num_results: int = 3) -> str:
             # fallback: return list of sources if no sentences extracted
             out = "No summarizable text found. Sources:\n"
             for i, e in enumerate(entries, start=1):
-                stars = "⭐️" * (1 + e["cred"])  # 1..3 stars
-                out += f"{i}. {e['title']} — {e['domain'] or e['link']} {stars}\n   {e['link']}\n"
             return out
         # Prepare query tokens for scoring (lowercased, split, remove very short tokens)
@@ -1080,10 +1080,10 @@ async def web_search_internal(query: str, num_results: int = 3) -> str:
         out_lines.append("Summary:")
         out_lines.append(" ".join(summary_with_cites))
         out_lines.append("\nSources:")
-        # List ALL top results (numbered) — show stars and short metadata
         for i, e in enumerate(entries, start=1):
-            stars = "⭐️" * (1 + e["cred"])  # 1..3 stars
-            out_lines.append(f"{i}. {e['title'] or '(no title)'} — {e['domain'] or e['link']} {stars}")
             if e["snippet"]:
                 out_lines.append(f"   {e['snippet']}")
             out_lines.append(f"   {e['link']}")
@@ -1169,7 +1169,11 @@ async def execute_tool(tool_name: str, user_id: str, **kwargs):
         query = kwargs.get('query')
         if not query:
             return {"error": "Search query is missing for web_search tool."}
-        return await web_search_internal(query)
     if tool_name == "get_current_date":
         return get_current_date_internal()
@@ -1372,6 +1376,100 @@ async def submit_labeled_image(
     return {"status": "success", "message": "Thank you for your feedback! It will be reviewed to help me learn."}
 # --- Helper function to preprocess images ---
 def preprocess_image(image_bytes: bytes) -> np.ndarray:
     """
@@ -1448,23 +1546,22 @@ async def image_analysis(user_id: str = Form(...), file: UploadFile = File(...))
             "user_feedback": None
         })
-        # Final response
-        if confident:
-            return {
-                "status": "success",
-                "metadata": metadata_info,
-                "predictions": predictions,
-                "ocr_text": ocr_text
-            }
-        else:
-            return {
-                "status": "uncertain",
-                "metadata": metadata_info,
-                "predictions": predictions,
-                "ocr_text": ocr_text,
-                "needs_user_verification": True,
-                "message": "Not confident 🤔. Please confirm or correct this result."
-            }
     except Exception as e:
         return {"status": "error", "message": f"Unexpected failure: {str(e)}"}

             # fallback: return list of sources if no sentences extracted
             out = "No summarizable text found. Sources:\n"
             for i, e in enumerate(entries, start=1):
+                stars = "*" * (1 + e["cred"])  # credibility: * to ***
+                out += f"{i}. {e['title']} — {e['domain'] or e['link']} [{stars}]\n   {e['link']}\n"
             return out
         # Prepare query tokens for scoring (lowercased, split, remove very short tokens)
         out_lines.append("Summary:")
         out_lines.append(" ".join(summary_with_cites))
         out_lines.append("\nSources:")
+        # List ALL top results (numbered) — show credibility and short metadata
         for i, e in enumerate(entries, start=1):
+            stars = "*" * (1 + e["cred"])  # credibility: * to ***
+            out_lines.append(f"{i}. {e['title'] or '(no title)'} — {e['domain'] or e['link']} [{stars}]")
             if e["snippet"]:
                 out_lines.append(f"   {e['snippet']}")
             out_lines.append(f"   {e['link']}")
         query = kwargs.get('query')
         if not query:
             return {"error": "Search query is missing for web_search tool."}
+        # web_search_internal returns a plain string (the formatted summary).
+        # We wrap it in a dict so json.dumps(tool_output) in the tool handler
+        # serialises it as {"result": "..."} instead of double-encoding a raw string.
+        result = await web_search_internal(query)
+        return {"result": result}
     if tool_name == "get_current_date":
         return get_current_date_internal()
     return {"status": "success", "message": "Thank you for your feedback! It will be reviewed to help me learn."}
+# --- Natural language interpretation builder ---
+def build_image_interpretation(predictions: list, ocr_text: str) -> dict:
+    """
+    Turns MobileNetV2 predictions and OCR text into a single natural-language
+    description of the image. Uses only the predictions whose confidence is
+    high enough to be reliable — never guesses below the noise floor.
+    Logic:
+    - Top prediction >= 60% → high confidence, anchor the sentence around it
+    - Additional predictions >= 8% → include as supporting details
+    - OCR text present → mention readable text found in the image
+    - All labels are cleaned: underscores replaced, known clothing/object
+      labels are grouped into human-readable phrases
+    """
+    if not predictions:
+        return {"description": "I was unable to identify the contents of this image."}
+    def clean_label(label: str) -> str:
+        """Convert ImageNet-style label to readable text."""
+        label = label.replace("_", " ").strip()
+        # Map common ImageNet labels to plain English phrases
+        label_map = {
+            "Windsor tie": "a Windsor tie",
+            "bow tie": "a bow tie",
+            "suit": "a suit",
+            "jersey": "a jersey",
+            "trench coat": "a trench coat",
+            "lab coat": "a lab coat",
+            "military uniform": "a military uniform",
+            "academic gown": "an academic gown",
+            "mortarboard": "a mortarboard",
+            "sunglasses": "sunglasses",
+            "sunglass": "sunglasses",
+            "vestment": "vestments",
+            "robe": "a robe",
+            "wig": "a wig",
+            "helmet": "a helmet",
+            "hardhat": "a hard hat",
+            "crash helmet": "a crash helmet",
+            "brassiere": "a brassiere",
+            "bikini": "a bikini",
+            "miniskirt": "a miniskirt",
+            "jean": "jeans",
+            "jean shorts": "jean shorts",
+            "sweatshirt": "a sweatshirt",
+            "overskirt": "a skirt",
+            "sarong": "a sarong",
+        }
+        return label_map.get(label, label)
+    top = predictions[0]
+    top_label = clean_label(top["description"])
+    top_conf = top["probability"]
+    # Collect supporting labels (>= 8% but not the top one)
+    supporting = [
+        clean_label(p["description"])
+        for p in predictions[1:]
+        if p["probability"] >= 0.08
+    ]
+    # Build the core sentence
+    if top_conf >= 0.60:
+        # High confidence — make a definite statement
+        parts = [f"This image appears to show {top_label}"]
+        if supporting:
+            parts.append(f", along with {', '.join(supporting)}")
+        parts.append(".")
+        description = "".join(parts)
+    elif top_conf >= 0.35:
+        # Medium confidence — soften slightly
+        parts = [f"This image likely contains {top_label}"]
+        if supporting:
+            parts.append(f", possibly also {', '.join(supporting)}")
+        parts.append(". I'm moderately confident in this reading.")
+        description = "".join(parts)
+    else:
+        # Low confidence — be honest
+        all_labels = [clean_label(p["description"]) for p in predictions if p["probability"] >= 0.05]
+        if all_labels:
+            description = (
+                f"I'm not very confident, but the image may contain: {', '.join(all_labels)}. "
+                "You may want to verify this."
+            )
+        else:
+            description = "I could not confidently identify the contents of this image."
+    # Append OCR findings if text was found
+    if ocr_text and len(ocr_text.strip()) > 2:
+        description += f" The image also contains readable text: \"{ocr_text.strip()[:300]}\""
+    return {"description": description}
 # --- Helper function to preprocess images ---
 def preprocess_image(image_bytes: bytes) -> np.ndarray:
     """
             "user_feedback": None
         })
+        # --- Build a natural language interpretation from predictions + OCR ---
+        interpretation = build_image_interpretation(predictions, ocr_text)
+        # Final response — always include the interpretation and an invitation
+        response_payload = {
+            "status": "success" if confident else "uncertain",
+            "metadata": metadata_info,
+            "predictions": predictions,
+            "ocr_text": ocr_text if ocr_text else None,
+            "interpretation": interpretation["description"],
+            "follow_up": "How can I assist you with this picture? 😊"
+        }
+        if not confident:
+            response_payload["needs_user_verification"] = True
+        return response_payload
     except Exception as e:
         return {"status": "error", "message": f"Unexpected failure: {str(e)}"}