Spaces:
Running
Running
| // Judgy Reachy No Phone - Browser Demo | |
| // Uses Transformers.js for YOLO detection in the browser | |
| // WebGPU implementation inspired by: https://huggingface.co/spaces/webml-community/YOLO26-WebGPU | |
| import { AutoModel, AutoProcessor, RawImage } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1'; | |
| // DOM Elements | |
| const video = document.getElementById('webcam'); | |
| const canvas = document.getElementById('canvas'); | |
| const ctx = canvas.getContext('2d', { willReadFrequently: true }); | |
| const robotSvg = document.getElementById('robot-svg'); | |
| const cameraBtn = document.getElementById('camera-btn'); | |
| const cameraIcon = document.getElementById('camera-icon'); | |
| const cameraText = document.getElementById('camera-text'); | |
| const startBtn = document.getElementById('start-btn'); | |
| const btnIcon = document.getElementById('btn-icon'); | |
| const btnText = document.getElementById('btn-text'); | |
| const statusIndicator = document.getElementById('status-indicator'); | |
| const statusText = document.getElementById('status-text'); | |
| const fpsEl = document.getElementById('fps'); | |
| const responseText = document.getElementById('response-text'); | |
| const loader = document.getElementById('loader'); | |
| const loaderText = document.getElementById('loader-text'); | |
| // Loader helpers | |
| const showLoader = (text) => { | |
| loaderText.textContent = text; | |
| loader.classList.add('visible'); | |
| }; | |
| const hideLoader = () => { | |
| loader.classList.remove('visible'); | |
| }; | |
| // Demo defaults (hardcoded - Pure Reachy mode) | |
| const DEMO_COOLDOWN = 10; // 10 seconds cooldown | |
| const DEMO_PRAISE_ENABLED = true; // Enable praise sounds! | |
| // State | |
| let model = null; | |
| let processor = null; | |
| let isRunning = false; | |
| let isMonitoring = false; | |
| let isProcessing = false; // Prevent overlapping detections | |
| let animationId = null; | |
| let stream = null; | |
| // Detection state | |
| let phoneVisible = false; | |
| let consecutivePhone = 0; | |
| let consecutiveNoPhone = 0; | |
| let phoneCount = 0; | |
| let lastReactionTime = 0; | |
| // Tracking state | |
| let lastPhoneBox = null; // Last known phone position | |
| let framesWithoutDetection = 0; // Count frames without detection | |
| // Offscreen canvases for processing (reused to prevent memory leaks) | |
| const offscreen = document.createElement('canvas'); | |
| const offscreenCtx = offscreen.getContext('2d', { willReadFrequently: true }); | |
| // Small canvas for YOLO inference (created once, reused every frame) | |
| const smallCanvas = document.createElement('canvas'); | |
| const smallCtx = smallCanvas.getContext('2d', { willReadFrequently: true }); | |
| // Constants | |
| const PHONE_CLASS_ID = 67; // Cell phone in COCO dataset | |
| const PICKUP_THRESHOLD = 3; | |
| const PUTDOWN_THRESHOLD = 15; | |
| const DETECTION_CONFIDENCE = 0.5; // Initial detection: 0.1 (low) to 0.9 (high) | |
| const TRACKING_CONFIDENCE = 0.2; // Lower threshold when tracking existing phone | |
| const TRACKING_PERSIST_FRAMES = 3; // Keep tracking for N frames after losing detection | |
| // Pure Reachy Mode - Robot emotion sounds from HuggingFace | |
| const SHAME_EMOTIONS = [ | |
| "disgusted1", | |
| "resigned1", | |
| "displeased1", | |
| "displeased2", | |
| "rage1", | |
| "no1", | |
| "reprimand1", | |
| "reprimand3", | |
| "dying1", | |
| "surprised1", | |
| "surprised2" | |
| ]; | |
| const PRAISE_EMOTIONS = [ | |
| "welcoming2", | |
| "inquiring1", | |
| "inquiring2", | |
| "proud1", | |
| "proud3", | |
| "success1", | |
| "success2", | |
| "enthusiastic1", | |
| "enthusiastic2", | |
| "grateful1", | |
| "yes1", | |
| "cheerful1" | |
| ]; | |
| const EMOTIONS_BASE_URL = "https://huggingface.co/datasets/pollen-robotics/reachy-mini-emotions-library/resolve/main"; | |
| // Initialize | |
| async function init() { | |
| try { | |
| // Disable buttons while loading | |
| cameraBtn.disabled = true; | |
| startBtn.disabled = true; | |
| // Detect mobile and choose appropriate model | |
| const isMobile = /iPhone|iPad|iPod|Android/i.test(navigator.userAgent); | |
| const modelName = isMobile ? 'yolo26n-ONNX' : 'yolo26m-ONNX'; | |
| const modelDisplay = isMobile ? 'YOLO26n (mobile-optimized)' : 'YOLO26m'; | |
| // Don't load model on mobile (CSS hides demo, shows alternative) | |
| if (isMobile) { | |
| console.log('Mobile detected - skipping model load'); | |
| hideLoader(); | |
| return; // Exit early, don't load model | |
| } | |
| // Show loader | |
| showLoader(`Loading ${modelDisplay} model...`); | |
| statusText.textContent = 'Loading AI model...'; | |
| statusIndicator.className = 'status-dot loading'; | |
| // Load YOLO model with WebGPU | |
| model = await AutoModel.from_pretrained(`onnx-community/${modelName}`, { | |
| device: 'webgpu', | |
| dtype: 'fp16' | |
| }); | |
| showLoader('Loading processor...'); | |
| processor = await AutoProcessor.from_pretrained(`onnx-community/${modelName}`); | |
| // Hide loader | |
| hideLoader(); | |
| statusText.textContent = 'Model ready! Open camera to begin'; | |
| statusIndicator.className = 'status-dot ready'; | |
| cameraBtn.disabled = false; | |
| console.log('YOLO model loaded successfully'); | |
| } catch (error) { | |
| console.error('Failed to load model:', error); | |
| showLoader('Failed to load model: ' + error.message); | |
| statusText.textContent = 'Error loading model'; | |
| statusIndicator.className = 'status-dot error'; | |
| } | |
| } | |
| // Start webcam | |
| async function startCamera() { | |
| try { | |
| stream = await navigator.mediaDevices.getUserMedia({ | |
| video: { | |
| width: 640, | |
| height: 480, | |
| facingMode: 'user' | |
| } | |
| }); | |
| video.srcObject = stream; | |
| await video.play(); | |
| // Show video and canvas | |
| video.style.display = 'block'; | |
| canvas.style.display = 'block'; | |
| canvas.width = offscreen.width = video.videoWidth; | |
| canvas.height = offscreen.height = video.videoHeight; | |
| isRunning = true; | |
| loop(); // Start the loop | |
| statusIndicator.className = isMonitoring ? 'status-dot monitoring' : 'status-dot ready'; | |
| } catch (error) { | |
| console.error('Camera error:', error); | |
| alert('Could not access webcam. Please allow camera permissions.'); | |
| } | |
| } | |
| // Stop webcam | |
| function stopCamera() { | |
| isRunning = false; | |
| if (stream) { | |
| stream.getTracks().forEach(track => track.stop()); | |
| stream = null; | |
| } | |
| if (animationId) { | |
| cancelAnimationFrame(animationId); | |
| } | |
| // Clear and hide video/canvas when closed | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| video.style.display = 'none'; | |
| canvas.style.display = 'none'; | |
| } | |
| // Main loop (like YOLO26-WebGPU) | |
| function loop() { | |
| if (!isRunning) return; | |
| if (!isMonitoring) { | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| } | |
| // Run detection if ready (non-blocking) | |
| if (isMonitoring && !isProcessing) { | |
| isProcessing = true; | |
| const startTime = performance.now(); | |
| detectAndProcess() | |
| .then(() => { | |
| fpsEl.textContent = Math.round(1000 / (performance.now() - startTime)); | |
| }) | |
| .finally(() => { | |
| isProcessing = false; | |
| }); | |
| } | |
| if (isRunning) animationId = requestAnimationFrame(loop); | |
| } | |
| // Combined detection and processing | |
| async function detectAndProcess() { | |
| // Detect phone and get results | |
| const detections = await detectPhoneAndGetBoxes(); | |
| // Process state machine | |
| const phoneInFrame = detections.length > 0; | |
| // Update consecutive counters | |
| if (phoneInFrame) { | |
| consecutivePhone++; | |
| consecutiveNoPhone = 0; | |
| } else { | |
| consecutiveNoPhone++; | |
| } | |
| // Check for phone pickup (3 frames) | |
| if (consecutivePhone >= PICKUP_THRESHOLD && !phoneVisible) { | |
| phoneVisible = true; | |
| consecutiveNoPhone = 0; | |
| const now = Date.now(); | |
| const cooldown = DEMO_COOLDOWN * 1000; | |
| if (now - lastReactionTime >= cooldown) { | |
| phoneCount++; | |
| lastReactionTime = now; | |
| handlePhonePickup(); | |
| } | |
| } | |
| // Check for periodic reaction while STILL holding phone | |
| if (phoneVisible && phoneInFrame) { | |
| const now = Date.now(); | |
| const cooldown = DEMO_COOLDOWN * 1000; | |
| if (now - lastReactionTime >= cooldown) { | |
| phoneCount++; | |
| lastReactionTime = now; | |
| handlePhonePickup(); | |
| } | |
| } | |
| // Check for putdown (15 frames) | |
| if (consecutiveNoPhone >= PUTDOWN_THRESHOLD && phoneVisible) { | |
| phoneVisible = false; | |
| consecutivePhone = 0; | |
| lastReactionTime = 0; | |
| // No praise in demo (keeps it simple) | |
| if (DEMO_PRAISE_ENABLED) { | |
| handlePhonePutdown(); | |
| } | |
| } | |
| // Update status (only if still monitoring) | |
| if (isMonitoring) { | |
| if (phoneVisible) { | |
| statusText.textContent = '📱 PHONE DETECTED!'; | |
| statusIndicator.className = 'status-dot detected'; | |
| } else { | |
| statusText.textContent = '✅ Phone-free'; | |
| statusIndicator.className = 'status-dot monitoring'; | |
| } | |
| // Draw (only when monitoring) | |
| draw(detections); | |
| } | |
| } | |
| // Detect phone and return detection boxes (like YOLO26-WebGPU) | |
| async function detectPhoneAndGetBoxes() { | |
| try { | |
| // Resize for faster inference (trade accuracy for speed) | |
| const targetWidth = 256; // Smaller = faster (256 for +5 FPS boost) | |
| const targetHeight = Math.round((targetWidth / offscreen.width) * offscreen.height); | |
| // Resize small canvas if needed (only on first run or video size change) | |
| if (smallCanvas.width !== targetWidth || smallCanvas.height !== targetHeight) { | |
| smallCanvas.width = targetWidth; | |
| smallCanvas.height = targetHeight; | |
| } | |
| // Draw resized image (reuse existing canvas) | |
| offscreenCtx.drawImage(video, 0, 0); | |
| smallCtx.drawImage(offscreen, 0, 0, targetWidth, targetHeight); | |
| const image = RawImage.fromCanvas(smallCanvas); | |
| // Run YOLO detection | |
| const inputs = await processor(image); | |
| const output = await model(inputs); | |
| // Process detections - YOLO26 format | |
| const scores = output.logits.sigmoid().data; | |
| const boxes = output.pred_boxes.data; | |
| // Adaptive confidence: lower threshold when tracking existing phone | |
| const confidenceThreshold = lastPhoneBox ? TRACKING_CONFIDENCE : DETECTION_CONFIDENCE; | |
| // Collect new detections | |
| const newDetections = []; | |
| let bestPhone = null; | |
| let bestScore = 0; | |
| // Check 300 detections | |
| for (let i = 0; i < 300; i++) { | |
| let maxScore = 0, maxClass = 0; | |
| // Find max class and score | |
| for (let j = 0; j < 80; j++) { | |
| const score = scores[i * 80 + j]; | |
| if (score > maxScore) { | |
| maxScore = score; | |
| maxClass = j; | |
| } | |
| } | |
| // Check if it's a phone with adaptive confidence | |
| if (maxClass === PHONE_CLASS_ID && maxScore >= confidenceThreshold) { | |
| // Get box coordinates (cx, cy, w, h) - normalized 0-1 | |
| const cx = boxes[i * 4]; | |
| const cy = boxes[i * 4 + 1]; | |
| const w = boxes[i * 4 + 2]; | |
| const h = boxes[i * 4 + 3]; | |
| // Convert to x1, y1, x2, y2 and scale to original canvas size | |
| const scaleX = canvas.width / targetWidth; | |
| const scaleY = canvas.height / targetHeight; | |
| const x1 = (cx - w / 2) * targetWidth * scaleX; | |
| const y1 = (cy - h / 2) * targetHeight * scaleY; | |
| const x2 = (cx + w / 2) * targetWidth * scaleX; | |
| const y2 = (cy + h / 2) * targetHeight * scaleY; | |
| const detection = { | |
| x1, y1, x2, y2, | |
| confidence: maxScore, | |
| class: 'cell phone' | |
| }; | |
| // Keep track of best detection | |
| if (maxScore > bestScore) { | |
| bestScore = maxScore; | |
| bestPhone = detection; | |
| } | |
| } | |
| } | |
| // Tracking logic: smooth and persist | |
| if (bestPhone) { | |
| // Phone detected - update tracking | |
| lastPhoneBox = bestPhone; | |
| framesWithoutDetection = 0; | |
| newDetections.push(bestPhone); | |
| } if (lastPhoneBox && framesWithoutDetection < TRACKING_PERSIST_FRAMES) { | |
| // No detection but still tracking - persist last known box | |
| framesWithoutDetection++; | |
| newDetections.push({ | |
| ...lastPhoneBox, | |
| confidence: lastPhoneBox.confidence * 0.9 // Fade confidence | |
| }); | |
| } else { | |
| // Lost tracking completely | |
| lastPhoneBox = null; | |
| framesWithoutDetection = 0; | |
| } | |
| // Return detections array | |
| return newDetections; | |
| } catch (error) { | |
| console.error('Detection error:', error); | |
| return []; | |
| } | |
| } | |
| // Draw (like YOLO26-WebGPU - clear and redraw every time) | |
| function draw(detections) { | |
| // Clear canvas | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| // Draw video | |
| ctx.drawImage(video, 0, 0, canvas.width, canvas.height); | |
| // Draw detection boxes | |
| for (const det of detections) { | |
| // Draw green box for phone | |
| ctx.strokeStyle = '#00ff00'; | |
| ctx.lineWidth = 3; | |
| ctx.strokeRect(det.x1, det.y1, det.x2 - det.x1, det.y2 - det.y1); | |
| // Draw label | |
| ctx.fillStyle = '#00ff00'; | |
| ctx.font = '16px Arial'; | |
| const text = `${det.class} ${(det.confidence * 100).toFixed(0)}%`; | |
| ctx.fillText(text, det.x1, det.y1 - 10); | |
| } | |
| } | |
| // Play Reachy emotion sound (Pure Reachy mode) | |
| async function playReachyEmotion(emotionList) { | |
| // Pick random emotion from list | |
| const emotionName = emotionList[Math.floor(Math.random() * emotionList.length)]; | |
| const audioUrl = `${EMOTIONS_BASE_URL}/${emotionName}.wav`; | |
| try { | |
| const audio = new Audio(audioUrl); | |
| await audio.play(); | |
| return emotionName; | |
| } catch (error) { | |
| console.warn(`Failed to play emotion ${emotionName}:`, error); | |
| return null; | |
| } | |
| } | |
| // Handle phone pickup - Pure Reachy mode | |
| async function handlePhonePickup() { | |
| // Change to KO/shame robot | |
| robotSvg.setAttribute('data', 'reachy-mad.svg'); | |
| robotSvg.classList.add('shaking'); | |
| // Play random shame emotion sound | |
| const emotionName = await playReachyEmotion(SHAME_EMOTIONS); | |
| // Show which emotion played | |
| if (emotionName) { | |
| responseText.textContent = `😡 *${emotionName}*`; | |
| } | |
| // Return to normal after animation | |
| setTimeout(() => { | |
| robotSvg.classList.remove('shaking'); | |
| }, 2000); | |
| } | |
| // Handle phone putdown - Pure Reachy mode | |
| async function handlePhonePutdown() { | |
| // Trigger robot praise animation (stays happy) | |
| robotSvg.classList.add('nodding'); | |
| robotSvg.setAttribute('data', 'reachy-happy.svg'); | |
| // Play random praise emotion sound | |
| const emotionName = await playReachyEmotion(PRAISE_EMOTIONS); | |
| // Show which emotion played | |
| if (emotionName) { | |
| responseText.textContent = `✨ *${emotionName}*`; | |
| } | |
| // Return to normal after animation | |
| setTimeout(() => { | |
| robotSvg.classList.remove('nodding'); | |
| }, 1500); | |
| } | |
| // Removed - stats not needed for demo | |
| // Event handlers | |
| cameraBtn.addEventListener('click', async () => { | |
| if (!isRunning) { | |
| // Open camera | |
| await startCamera(); | |
| cameraIcon.textContent = '🎥'; | |
| cameraText.textContent = 'Close Camera'; | |
| startBtn.disabled = false; | |
| isMonitoring = true; | |
| btnIcon.textContent = '🛑'; | |
| btnText.textContent = 'Stop Monitoring'; | |
| statusIndicator.className = 'status-dot monitoring'; | |
| } else { | |
| // Close camera | |
| isMonitoring = false; | |
| stopCamera(); | |
| cameraIcon.textContent = '📹'; | |
| cameraText.textContent = 'Open Camera'; | |
| startBtn.disabled = true; | |
| btnIcon.textContent = '▶️'; | |
| btnText.textContent = 'Start Monitoring'; | |
| statusText.textContent = 'Camera closed'; | |
| statusIndicator.className = 'status-dot ready'; | |
| robotSvg.setAttribute('data', 'reachy-happy.svg'); | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| // Reset tracking | |
| phoneVisible = false; | |
| consecutivePhone = 0; | |
| consecutiveNoPhone = 0; | |
| lastPhoneBox = null; | |
| framesWithoutDetection = 0; | |
| lastReactionTime = 0; | |
| } | |
| }); | |
| startBtn.addEventListener('click', async () => { | |
| isMonitoring = !isMonitoring; | |
| if (isMonitoring) { | |
| btnIcon.textContent = '🛑'; | |
| btnText.textContent = 'Stop Monitoring'; | |
| statusIndicator.className = 'status-dot monitoring'; | |
| } else { | |
| btnIcon.textContent = '▶️'; | |
| btnText.textContent = 'Start Monitoring'; | |
| statusText.textContent = 'Paused'; | |
| statusIndicator.className = 'status-dot ready'; | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| robotSvg.setAttribute('data', 'reachy-happy.svg'); | |
| // Reset tracking | |
| phoneVisible = false; | |
| consecutivePhone = 0; | |
| consecutiveNoPhone = 0; | |
| lastPhoneBox = null; | |
| framesWithoutDetection = 0; | |
| lastReactionTime = 0; | |
| } | |
| }); | |
| // Initialize on load | |
| init(); | |