Spaces:

akhaliq
/

sam3

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 22 days ago

Commit

ff645cc

verified ·

1 Parent(s): b09e323

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

app.py +198 -0
requirements.txt +17 -0
utils.py +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image
+import matplotlib
+from transformers import Sam3Processor, Sam3Model
+import warnings
+warnings.filterwarnings("ignore")
+# Global model and processor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = Sam3Model.from_pretrained("facebook/sam3", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
+processor = Sam3Processor.from_pretrained("facebook/sam3")
+def overlay_masks(image: Image.Image, masks: torch.Tensor) -> Image.Image:
+    """
+    Overlay segmentation masks on the input image using rainbow colormap.
+    """
+    image = image.convert("RGBA")
+    masks = 255 * masks.cpu().numpy().astype(np.uint8)
+    n_masks = masks.shape[0]
+    if n_masks == 0:
+        return image.convert("RGB")
+    cmap = matplotlib.colormaps.get_cmap("rainbow").resampled(n_masks)
+    colors = [
+        tuple(int(c * 255) for c in cmap(i)[:3])
+        for i in range(n_masks)
+    ]
+    for mask, color in zip(masks, colors):
+        mask_img = Image.fromarray(mask)
+        overlay = Image.new("RGBA", image.size, color + (0,))
+        alpha = mask_img.point(lambda v: int(v * 0.5))
+        overlay.putalpha(alpha)
+        image = Image.alpha_composite(image, overlay)
+    return image
+def segment(image: Image.Image, text: str, threshold: float, mask_threshold: float):
+    """
+    Perform promptable concept segmentation using SAM3.
+    """
+    if image is None:
+        return None, "❌ Please upload an image."
+    try:
+        inputs = processor(images=image, text=text.strip(), return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        results = processor.post_process_instance_segmentation(
+            outputs,
+            threshold=threshold,
+            mask_threshold=mask_threshold,
+            target_sizes=inputs.get("original_sizes").tolist()
+        )[0]
+        n_masks = len(results['masks'])
+        if n_masks == 0:
+            return image, f"❌ No objects found matching '{text}' (try adjusting thresholds or changing prompt)."
+        overlaid_image = overlay_masks(image, results["masks"])
+        scores_text = ", ".join([f"{s:.2f}" for s in results['scores'].cpu().numpy()[:5]])  # Top 5 scores
+        info = f"✅ Found **{n_masks}** objects matching **'{text}'**\nConfidence scores: {scores_text}{'...' if n_masks > 5 else ''}"
+        return overlaid_image, info
+    except Exception as e:
+        return image, f"❌ Error during segmentation: {str(e)}"
+# Gradio Interface
+with gr.Blocks(
+    theme=gr.themes.Soft(),
+    title="SAM3 - Promptable Concept Segmentation",
+    css="""
+    .gradio-container {max-width: 1400px !important;}
+    """
+) as demo:
+    gr.Markdown(
+        """
+        # SAM3 - Promptable Concept Segmentation (PCS)
+        **SAM3** performs zero-shot instance segmentation using natural language prompts on images.
+        Upload an image, enter a text prompt (e.g., "person", "car", "dog"), and get segmentation masks for all matching objects.
+        Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
+        """
+    )
+    gr.Markdown("### Inputs")
+    with gr.Row(variant="panel"):
+        image_input = gr.Image(
+            label="Input Image",
+            type="pil",
+            height=400,
+            sources=["upload", "url"],
+            info="Upload or paste image URL"
+        )
+        image_output = gr.Image(
+            label="Output (Segmented Image)",
+            height=400,
+            interactive=False
+        )
+    with gr.Row():
+        text_input = gr.Textbox(
+            label="Text Prompt",
+            placeholder="e.g., a person, ear, cat, bicycle...",
+            scale=3
+        )
+        gr.Button("🔍 Clear", size="sm", variant="secondary").click(
+            fn=lambda: (None, "", None, 0.5, 0.5), outputs=[image_output, text_input, image_input, thresh_slider, mask_thresh_slider]
+        )
+    with gr.Row():
+        thresh_slider = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.5,
+            step=0.01,
+            label="Detection Threshold",
+            info="Higher values = fewer detections (objectness confidence)"
+        )
+        mask_thresh_slider = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.5,
+            step=0.01,
+            label="Mask Threshold",
+            info="Higher values = sharper masks"
+        )
+    info_output = gr.Markdown(
+        value="📝 Enter a prompt and click **Segment** to start.",
+        label="Info / Results"
+    )
+    segment_btn = gr.Button("🎯 Segment", variant="primary", size="lg")
+    # Event
+    segment_btn.click(
+        fn=segment,
+        inputs=[image_input, text_input, thresh_slider, mask_thresh_slider],
+        outputs=[image_output, info_output]
+    ).then(
+        fn=lambda: gr.Info("Segmentation complete!"),
+        _js="() => {}"
+    )
+    # Examples
+    gr.Markdown("### Examples")
+    examples = [
+        [
+            "http://images.cocodataset.org/val2017/000000077595.jpg",
+            "ear"
+        ],
+        [
+            "http://images.cocodataset.org/val2017/000000039769.jpg",
+            "cat"
+        ],
+        [
+            "http://images.cocodataset.org/val2017/000000001247.jpg",
+            "person"
+        ],
+        [
+            "http://images.cocodataset.org/val2017/000000521315.jpg",
+            "bicycle"
+        ],
+        [
+            "http://images.cocodataset.org/val2017/000000029369.jpg",
+            "dog"
+        ]
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=[image_input, text_input],
+        fn=segment,
+        outputs=[image_output, info_output],
+        cache_examples=True,
+        examples_per_page=10,
+        label="Try these COCO examples (URLs auto-load)"
+    )
+    gr.Markdown(
+        """
+        ### Notes
+        - **Model**: [facebook/sam3](https://huggingface.co/facebook/sam3)
+        - Supports natural language prompts like "a red car" or simple nouns.
+        - GPU recommended for faster inference.
+        - Thresholds control detection sensitivity and mask quality.
+        """
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+git+https://github.com/huggingface/transformers
+torch
+torchvision
+torchaudio
+gradio
+matplotlib
+numpy
+Pillow
+accelerate
+tokenizers
+datasets
+requests
+opencv-python
+scipy
+pillow
+imageio
+scikit-image

utils.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # No additional utility functions needed beyond what's in app.py
2	+ # All helpers (overlay_masks, segment) are defined in app.py for simplicity and global access.