"use client"; import { motion } from "framer-motion"; import { COLORS, VizFrame } from "./common"; /** * Side-by-side comparison: a fully-connected layer between an image and a * single output neuron versus a small convolution sliding the same kernel * across all locations. The point is to make the parameter count and the * weight-sharing visible. */ export function ConvIntuition({ width = 980, height = 380, }: { width?: number; height?: number; }) { const N = 6; // image side const cellPx = 18; const padX = 30; const panelW = (width - padX * 3) / 2; const panelH = height - 30; return ( {/* Left — dense */} dense layer {/* Image grid */} {Array.from({ length: N * N }, (_, k) => { const r = Math.floor(k / N); const c = k % N; return ( ); })} {/* Output neuron */} y {/* Lines from each pixel */} {Array.from({ length: N * N }, (_, k) => { const r = Math.floor(k / N); const c = k % N; const x1 = 28 + c * cellPx + cellPx / 2; const y1 = 60 + r * cellPx + cellPx / 2; return ( ); })} params per output = H · W · C = {N * N} no spatial bias — must learn translation from scratch for an HD image: millions of params per neuron {/* Right — convolution */} convolution {/* Image grid with one 3x3 kernel highlighted at three positions */} {Array.from({ length: N * N }, (_, k) => { const r = Math.floor(k / N); const c = k % N; return ( ); })} {/* kernel highlights */} {[ [0, 0], [1, 2], [3, 3], ].map(([r, c], i) => ( ))} {/* Kernel label */} same 3×3 kernel applied everywhere {[0, 1, 2].map((r) => ( {[0, 1, 2].map((c) => ( ))} ))} params per filter = K · K · C = 9 translation-equivariant — pattern detected anywhere ~10⁵× fewer parameters than the dense equivalent ); }