Spaces:

webml-community
/

segment-anything-webgpu

Running

App Files Files Community

Xenova HF staff commited on Aug 11

Commit

1af09e8

•

1 Parent(s): 685050c

Upload 3 files

Browse files

Files changed (3) hide show

index.css +119 -0
index.html +41 -28
index.js +325 -79

index.css ADDED Viewed

	@@ -0,0 +1,119 @@

+* {
+    box-sizing: border-box;
+    padding: 0;
+    margin: 0;
+    font-family: sans-serif;
+}
+html,
+body {
+    height: 100%;
+}
+body {
+    padding: 16px 32px;
+}
+body,
+#container,
+#upload-button {
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+}
+h1,
+h3 {
+    text-align: center;
+}
+#container {
+    position: relative;
+    width: 640px;
+    height: 420px;
+    max-width: 100%;
+    max-height: 100%;
+    border: 2px dashed #D1D5DB;
+    border-radius: 0.75rem;
+    overflow: hidden;
+    cursor: pointer;
+    margin-top: 1rem;
+    background-size: 100% 100%;
+    background-position: center;
+    background-repeat: no-repeat;
+}
+#mask-output {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    pointer-events: none;
+}
+#upload-button {
+    gap: 0.4rem;
+    font-size: 18px;
+    cursor: pointer;
+    opacity: 0.2;
+}
+#upload {
+    display: none;
+}
+svg {
+    pointer-events: none;
+}
+#example {
+    font-size: 14px;
+    text-decoration: underline;
+    cursor: pointer;
+    pointer-events: none;
+}
+#example:hover {
+    color: #2563EB;
+}
+canvas {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    opacity: 0.6;
+}
+#status {
+    min-height: 16px;
+    margin: 8px 0;
+}
+.icon {
+    height: 16px;
+    width: 16px;
+    position: absolute;
+    transform: translate(-50%, -50%);
+}
+#controls>button {
+    padding: 6px 12px;
+    background-color: #3498db;
+    color: white;
+    border: 1px solid #2980b9;
+    border-radius: 5px;
+    cursor: pointer;
+    font-size: 16px;
+}
+#controls>button:disabled {
+    background-color: #d1d5db;
+    color: #6b7280;
+    border: 1px solid #9ca3af;
+    cursor: not-allowed;
+}
+#information {
+    margin-top: 0.25rem;
+    font-size: 15px;
+}

index.html CHANGED Viewed

@@ -1,29 +1,42 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8" />
-    <link rel="stylesheet" href="style.css" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Transformers.js - Object Detection</title>
-</head>
-<body>
-    <h1>Object Detection w/ 🤗 Transformers.js</h1>
-    <label id="container" for="upload">
-        <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
-            <path fill="#000"
-                d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z">
-            </path>
-        </svg>
-        Click to upload image
-        <label id="example">(or try example)</label>
-    </label>
-    <label id="status">Loading model...</label>
-    <input id="upload" type="file" accept="image/*" />
-    <script src="index.js" type="module"></script>
-</body>
 </html>

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <link rel="stylesheet" href="index.css" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Transformers.js - Segment Anything WebGPU</title>
+</head>
+<body>
+    <h1>Segment Anything WebGPU</h1>
+    <h3>In-browser image segmentation w/ <a href="https://hf.co/docs/transformers.js" target="_blank">🤗
+            Transformers.js</a></h3>
+    <div id="container">
+        <label id="upload-button" for="upload">
+            <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
+                <path fill="#000"
+                    d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z">
+                </path>
+            </svg>
+            Click to upload image
+            <label id="example">(or try example)</label>
+        </label>
+        <canvas id="mask-output"></canvas>
+    </div>
+    <label id="status"></label>
+    <div id="controls">
+        <button id="reset-image">Reset image</button>
+        <button id="clear-points">Clear points</button>
+        <button id="cut-mask" disabled>Cut mask</button>
+    </div>
+    <p id="information">
+        Left click = positive points, right click = negative points.
+    </p>
+    <input id="upload" type="file" accept="image/*" disabled />
+    <script src="index.js" type="module"></script>
+</body>
 </html>

index.js CHANGED Viewed

@@ -1,79 +1,325 @@
-import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.10.1';
-// Since we will download the model from the Hugging Face Hub, we can skip the local model check
-env.allowLocalModels = false;
-// Reference the elements that we will need
-const status = document.getElementById('status');
-const fileUpload = document.getElementById('upload');
-const imageContainer = document.getElementById('container');
-const example = document.getElementById('example');
-const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
-// Create a new object detection pipeline
-status.textContent = 'Loading model...';
-const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
-status.textContent = 'Ready';
-example.addEventListener('click', (e) => {
-    e.preventDefault();
-    detect(EXAMPLE_URL);
-});
-fileUpload.addEventListener('change', function (e) {
-    const file = e.target.files[0];
-    if (!file) {
-        return;
-    }
-    const reader = new FileReader();
-    // Set up a callback when the file is loaded
-    reader.onload = e2 => detect(e2.target.result);
-    reader.readAsDataURL(file);
-});
-// Detect objects in the image
-async function detect(img) {
-    imageContainer.innerHTML = '';
-    imageContainer.style.backgroundImage = `url(${img})`;
-    status.textContent = 'Analysing...';
-    const output = await detector(img, {
-        threshold: 0.5,
-        percentage: true,
-    });
-    status.textContent = '';
-    output.forEach(renderBox);
-}
-// Render a bounding box and label on the image
-function renderBox({ box, label }) {
-    const { xmax, xmin, ymax, ymin } = box;
-    // Generate a random color for the box
-    const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
-    // Draw the box
-    const boxElement = document.createElement('div');
-    boxElement.className = 'bounding-box';
-    Object.assign(boxElement.style, {
-        borderColor: color,
-        left: 100 * xmin + '%',
-        top: 100 * ymin + '%',
-        width: 100 * (xmax - xmin) + '%',
-        height: 100 * (ymax - ymin) + '%',
-    })
-    // Draw label
-    const labelElement = document.createElement('span');
-    labelElement.textContent = label;
-    labelElement.className = 'bounding-box-label';
-    labelElement.style.backgroundColor = color;
-    boxElement.appendChild(labelElement);
-    imageContainer.appendChild(boxElement);
-}

+import { SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.4';
+// Reference the elements we will use
+const statusLabel = document.getElementById('status');
+const fileUpload = document.getElementById('upload');
+const imageContainer = document.getElementById('container');
+const example = document.getElementById('example');
+const maskCanvas = document.getElementById('mask-output');
+const uploadButton = document.getElementById('upload-button');
+const resetButton = document.getElementById('reset-image');
+const clearButton = document.getElementById('clear-points');
+const cutButton = document.getElementById('cut-mask');
+// State variables
+let lastPoints = null;
+let isDecoding = false;
+let isMultiMaskMode = false;
+let imageDataURI = null;
+let imageInputs = null;
+let imageEmbeddings = null;
+// Constants
+const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/';
+const EXAMPLE_URL = BASE_URL + 'corgi.jpg';
+// Preload star and cross images to avoid lag on first click
+const star = new Image();
+star.src = BASE_URL + 'star-icon.png';
+star.className = 'icon';
+const cross = new Image();
+cross.src = BASE_URL + 'cross-icon.png';
+cross.className = 'icon';
+async function decode() {
+    if (!imageInputs || !imageEmbeddings) {
+        return;
+    }
+    isDecoding = true;
+    // Prepare inputs for decoding
+    const reshaped = imageInputs.reshaped_input_sizes[0];
+    const points = lastPoints.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
+    const labels = lastPoints.map(x => BigInt(x.label));
+    const input_points = new Tensor(
+        'float32',
+        points.flat(Infinity),
+        [1, 1, points.length, 2],
+    )
+    const input_labels = new Tensor(
+        'int64',
+        labels.flat(Infinity),
+        [1, 1, labels.length],
+    )
+    // Generate the mask
+    const { pred_masks, iou_scores } = await model({
+        ...imageEmbeddings,
+        input_points,
+        input_labels,
+    })
+    // Post-process the mask
+    const masks = await processor.post_process_masks(
+        pred_masks,
+        imageInputs.original_sizes,
+        imageInputs.reshaped_input_sizes,
+    );
+    const data = {
+        mask: RawImage.fromTensor(masks[0][0]),
+        scores: iou_scores.data,
+    };
+    isDecoding = false;
+    if (!isMultiMaskMode && lastPoints) {
+        // Perform decoding with the last point
+        decode();
+        lastPoints = null;
+    }
+    const { mask, scores } = data;
+    // Update canvas dimensions (if different)
+    if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
+        maskCanvas.width = mask.width;
+        maskCanvas.height = mask.height;
+    }
+    // Create context and allocate buffer for pixel data
+    const context = maskCanvas.getContext('2d');
+    const imageData = context.createImageData(maskCanvas.width, maskCanvas.height);
+    // Select best mask
+    const numMasks = scores.length; // 3
+    let bestIndex = 0;
+    for (let i = 1; i < numMasks; ++i) {
+        if (scores[i] > scores[bestIndex]) {
+            bestIndex = i;
+        }
+    }
+    statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
+    // Fill mask with colour
+    const pixelData = imageData.data;
+    for (let i = 0; i < pixelData.length; ++i) {
+        if (mask.data[numMasks * i + bestIndex] === 1) {
+            const offset = 4 * i;
+            pixelData[offset] = 0;       // red
+            pixelData[offset + 1] = 114; // green
+            pixelData[offset + 2] = 189; // blue
+            pixelData[offset + 3] = 255; // alpha
+        }
+    }
+    // Draw image data to context
+    context.putImageData(imageData, 0, 0);
+}
+function clearPointsAndMask() {
+    // Reset state
+    isMultiMaskMode = false;
+    lastPoints = null;
+    // Remove points from previous mask (if any)
+    document.querySelectorAll('.icon').forEach(e => e.remove());
+    // Disable cut button
+    cutButton.disabled = true;
+    // Reset mask canvas
+    maskCanvas.getContext('2d').clearRect(0, 0, maskCanvas.width, maskCanvas.height);
+}
+clearButton.addEventListener('click', clearPointsAndMask);
+resetButton.addEventListener('click', () => {
+    // Update state
+    imageEmbeddings = null;
+    imageDataURI = null;
+    // Reset the state
+    imageInputs = null;
+    imageEmbeddings = null;
+    isDecoding = false;
+    // Clear points and mask (if present)
+    clearPointsAndMask();
+    // Update UI
+    cutButton.disabled = true;
+    imageContainer.style.backgroundImage = 'none';
+    uploadButton.style.display = 'flex';
+    statusLabel.textContent = 'Ready';
+});
+async function segment(data) {
+    statusLabel.textContent = 'Extracting image embedding...';
+    // Update state
+    imageEmbeddings = null;
+    imageDataURI = data;
+    // Update UI
+    imageContainer.style.backgroundImage = `url(${data})`;
+    uploadButton.style.display = 'none';
+    cutButton.disabled = true;
+    // Read the image and recompute image embeddings
+    const image = await RawImage.read(data);
+    imageInputs = await processor(image);
+    imageEmbeddings = await model.get_image_embeddings(imageInputs)
+    statusLabel.textContent = 'Embedding extracted!';
+}
+// Handle file selection
+fileUpload.addEventListener('change', function (e) {
+    const file = e.target.files[0];
+    if (!file) {
+        return;
+    }
+    const reader = new FileReader();
+    // Set up a callback when the file is loaded
+    reader.onload = e2 => segment(e2.target.result);
+    reader.readAsDataURL(file);
+});
+example.addEventListener('click', (e) => {
+    e.preventDefault();
+    segment(EXAMPLE_URL);
+});
+function addIcon({ point, label }) {
+    const icon = (label === 1 ? star : cross).cloneNode();
+    icon.style.left = `${point[0] * 100}%`;
+    icon.style.top = `${point[1] * 100}%`;
+    imageContainer.appendChild(icon);
+}
+// Attach hover event to image container
+imageContainer.addEventListener('mousedown', e => {
+    if (e.button !== 0 && e.button !== 2) {
+        return; // Ignore other buttons
+    }
+    if (!imageEmbeddings) {
+        return; // Ignore if not encoded yet
+    }
+    if (!isMultiMaskMode) {
+        lastPoints = [];
+        isMultiMaskMode = true;
+        cutButton.disabled = false;
+    }
+    const point = getPoint(e);
+    lastPoints.push(point);
+    // add icon
+    addIcon(point);
+    decode();
+});
+// Clamp a value inside a range [min, max]
+function clamp(x, min = 0, max = 1) {
+    return Math.max(Math.min(x, max), min)
+}
+function getPoint(e) {
+    // Get bounding box
+    const bb = imageContainer.getBoundingClientRect();
+    // Get the mouse coordinates relative to the container
+    const mouseX = clamp((e.clientX - bb.left) / bb.width);
+    const mouseY = clamp((e.clientY - bb.top) / bb.height);
+    return {
+        point: [mouseX, mouseY],
+        label: e.button === 2 // right click
+            ? 0  // negative prompt
+            : 1, // positive prompt
+    }
+}
+// Do not show context menu on right click
+imageContainer.addEventListener('contextmenu', e => {
+    e.preventDefault();
+});
+// Attach hover event to image container
+imageContainer.addEventListener('mousemove', e => {
+    if (!imageEmbeddings || isMultiMaskMode) {
+        // Ignore mousemove events if the image is not encoded yet,
+        // or we are in multi-mask mode
+        return;
+    }
+    lastPoints = [getPoint(e)];
+    if (!isDecoding) {
+        decode(); // Only decode if we are not already decoding
+    }
+});
+// Handle cut button click
+cutButton.addEventListener('click', () => {
+    const [w, h] = [maskCanvas.width, maskCanvas.height];
+    // Get the mask pixel data
+    const maskContext = maskCanvas.getContext('2d');
+    const maskPixelData = maskContext.getImageData(0, 0, w, h);
+    // Load the image
+    const image = new Image();
+    image.crossOrigin = 'anonymous';
+    image.onload = async () => {
+        // Create a new canvas to hold the image
+        const imageCanvas = new OffscreenCanvas(w, h);
+        const imageContext = imageCanvas.getContext('2d');
+        imageContext.drawImage(image, 0, 0, w, h);
+        const imagePixelData = imageContext.getImageData(0, 0, w, h);
+        // Create a new canvas to hold the cut-out
+        const cutCanvas = new OffscreenCanvas(w, h);
+        const cutContext = cutCanvas.getContext('2d');
+        const cutPixelData = cutContext.getImageData(0, 0, w, h);
+        // Copy the image pixel data to the cut canvas
+        for (let i = 3; i < maskPixelData.data.length; i += 4) {
+            if (maskPixelData.data[i] > 0) {
+                for (let j = 0; j < 4; ++j) {
+                    const offset = i - j;
+                    cutPixelData.data[offset] = imagePixelData.data[offset];
+                }
+            }
+        }
+        cutContext.putImageData(cutPixelData, 0, 0);
+        // Download image
+        const link = document.createElement('a');
+        link.download = 'image.png';
+        link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
+        link.click();
+        link.remove();
+    }
+    image.src = imageDataURI;
+});
+const model_id = 'Xenova/slimsam-77-uniform';
+statusLabel.textContent = 'Loading model...';
+const model = await SamModel.from_pretrained(model_id, {
+    dtype: 'fp16',
+    device: 'webgpu',
+});
+const processor = await AutoProcessor.from_pretrained(model_id);
+statusLabel.textContent = 'Ready';
+// Enable the user interface
+fileUpload.disabled = false;
+uploadButton.style.opacity = 1;
+example.style.pointerEvents = 'auto';