Spaces:

SupraLabs
/

Research-SupraLabs

Running

App Files Files Community

LH-Tech-AI commited on 14 days ago

Commit

2b06042

verified ·

1 Parent(s): 39282ec

Create epochs-vs-data.html

Browse files

Files changed (1) hide show

epochs-vs-data.html +332 -0

epochs-vs-data.html ADDED Viewed

	@@ -0,0 +1,332 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>SupraLabs | Satiating the Latent Space: Unique Tokens vs. Cycles</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <style>
+        :root {
+            --bg: #0f0f0f;
+            --surface: #1a1a1a;
+            --border: #333;
+            --text: #e0e0e0;
+            --accent: #536bfe; /* Supra Blue */
+            --muted: #888;
+            --success: #00e676;
+            --warning: #ffb300;
+            --font-mono: 'JetBrains Mono', 'Fira Code', monospace;
+        }
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+        body {
+            background-color: var(--bg);
+            color: var(--text);
+            font-family: 'Inter', -apple-system, sans-serif;
+            line-height: 1.6;
+            padding: 2rem;
+        }
+        code, pre, .mono { font-family: var(--font-mono); }
+        .container { max-width: 1000px; margin: 0 auto; }
+        header {
+            border-bottom: 2px solid var(--border);
+            padding-bottom: 2rem;
+            margin-bottom: 3rem;
+            display: flex;
+            justify-content: space-between;
+            align-items: flex-end;
+        }
+        .logo-area a { text-decoration: none; color: inherit; }
+        .logo-area h1 {
+            font-size: 1.2rem;
+            text-transform: uppercase;
+            letter-spacing: 2px;
+            color: var(--accent);
+            line-height: 1;
+            display: flex;
+            align-items: center;
+            gap: 10px;
+        }
+        nav a {
+            color: var(--text);
+            text-decoration: none;
+            margin-left: 1.5rem;
+            font-size: 0.9rem;
+            border-bottom: 1px solid transparent;
+        }
+        nav a:hover { border-bottom: 1px solid var(--accent); }
+        .hero { margin-bottom: 4rem; }
+        .hero h2 { font-size: 3rem; line-height: 1.1; margin-bottom: 1.5rem; font-weight: 800; }
+        .hero p { font-size: 1.2rem; color: var(--muted); max-width: 750px; }
+        .section-label {
+            display: block;
+            font-family: var(--font-mono);
+            color: var(--accent);
+            font-size: 0.8rem;
+            margin-top: 3rem;
+            margin-bottom: 1rem;
+            text-transform: uppercase;
+        }
+        .card { background: var(--surface); border: 1px solid var(--border); padding: 2.5rem; margin-bottom: 2rem; }
+        h3 { font-size: 1.6rem; margin-bottom: 1rem; font-weight: 700; }
+        p { margin-bottom: 1rem; color: #ccc; }
+        ul { margin-left: 1.5rem; margin-bottom: 1.5rem; }
+        li { margin-bottom: 0.5rem; }
+        .winner-badge { color: var(--success); font-weight: bold; font-family: var(--font-mono); }
+        .table-container { overflow-x: auto; margin: 2rem 0; border: 1px solid var(--border); }
+        table { width: 100%; border-collapse: collapse; text-align: left; font-size: 0.95rem; }
+        th, td { padding: 1rem; border-bottom: 1px solid var(--border); }
+        th { background-color: rgba(26, 26, 26, 0.8); font-family: var(--font-mono); color: var(--accent); font-size: 0.85rem; text-transform: uppercase; }
+        tr:hover { background-color: rgba(83, 107, 254, 0.05); }
+        .highlight-row { border-left: 4px solid var(--success); background-color: rgba(0, 230, 118, 0.02); }
+        .chart-box { background: var(--surface); border: 1px solid var(--border); padding: 2rem; margin-bottom: 2rem; }
+        .stats-grid {
+            display: grid;
+            grid-template-columns: 1fr 1fr 1fr;
+            gap: 1rem;
+            margin-top: 4rem;
+            border-top: 1px solid var(--border);
+            padding-top: 2rem;
+        }
+        .stat-box { padding: 1rem; border-left: 2px solid var(--accent); }
+        .stat-box small { display: block; color: var(--muted); font-family: var(--font-mono); }
+        footer { margin-top: 6rem; padding-bottom: 2rem; font-size: 0.8rem; color: var(--muted); text-align: center; }
+        @media (max-width: 768px) {
+            .hero h2 { font-size: 2rem; }
+            header { flex-direction: column; align-items: flex-start; gap: 1rem; }
+            nav a { margin-left: 0; margin-right: 1rem; }
+            .stats-grid { grid-template-columns: 1fr; }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <div class="logo-area" style="font-size: 1.5em;">
+                <a href="index.html"><h1><img src="./image.png" style="height: 2em" alt="Logo"> SupraLabs_</h1></a>
+            </div>
+            <nav>
+                <a href="#summary">Core Learnings</a>
+                <a href="#benchmarks">Pretrain Matrix</a>
+                <a href="#charts">Visualizations</a>
+                <a href="https://huggingface.co/SupraLabs" target="_blank">HuggingFace</a>
+            </nav>
+        </header>
+        <section class="hero">
+            <h2>Experiment #6:<br>More Epochs vs. More Data for SLMs</h2>
+            <p>A rigorous, mathematical verification of information limits under a strict compute constraint. We held total token exposure starr at <strong>200,000,000 processed steps</strong>, testing total unique data volume directly against looping recurrent cycles.</p>
+        </section>
+        <span class="section-label" id="summary">// Data_Entropy_&_Reasoning_Loss</span>
+        <div class="card">
+            <h3>Isolating Token Freshness in the Static Latent Block</h3>
+            <p>Chinchilla compute curves dictate linear resource scaling. Our targeted isolation runs reveal an asymmetric divergence between static loss optimization and objective downstream capability inside sub-10M environments:</p>
+            <ul>
+                <li><strong>The Logic Divergence Cliff (Run 1):</strong> Maximizing unique data exposure (200M unique steps × 1 Epoch) delivers the highest reasoning performance, claiming 33.42% accuracy on factual deduction (ARC-Easy). Fresh token entropy is essential for non-repetitive learning.</li>
+                <li><strong>The Perplexity Sweetspot (Run 2):</strong> Running a micro-cycle (100M unique steps × 2 Epochs) yields a slight boost in base linguistic perplexity (236.80). The immediate repetition helps tiny architectures reinforce core syntactic boundaries.</li>
+                <li><strong>The Overfitting Illusion (Run 5):</strong> Compressing unique data down to 25M while repeating for 8 full epochs drops training loss to its absolute minimum (4.196). However, this triggers semantic memorization, ruining factual reasoning properties.</li>
+            </ul>
+        </div>
+        <span class="section-label" id="benchmarks">// Information_Density_Matrix</span>
+        <div class="card" style="padding: 1.5rem;">
+            <h3>Symmetric Token Matrix Results</h3>
+            <p>Every configuration is locked to exactly 200M total token exposure steps. Validation loss tracking alone is deceptive due to language overfitting parameters.</p>
+            <div class="table-container">
+                <table>
+                    <thead>
+                        <tr>
+                            <th>Benchmark / Metric</th>
+                            <th style="color: var(--success)">Run 1: 200M × 1 (🏆 Facts Win)</th>
+                            <th style="color: var(--success)">Run 2: 100M × 2 (🏆 PPL Win)</th>
+                            <th>Run 3: 50M × 4</th>
+                            <th>Run 4: 40M × 5</th>
+                            <th>Run 5: 25M × 8</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td class="mono">Unique Tokens Pool</td>
+                            <td>200,000,000</td>
+                            <td>100,000,000</td>
+                            <td>50,000,000</td>
+                            <td>40,000,000</td>
+                            <td>25,000,000</td>
+                        </tr>
+                        <tr>
+                            <td class="mono">Training Epochs Block</td>
+                            <td>1 Epoch</td>
+                            <td>2 Epochs</td>
+                            <td>4 Epochs</td>
+                            <td>5 Epochs</td>
+                            <td>8 Epochs</td>
+                        </tr>
+                        <tr>
+                            <td class="mono">Final Pretrain Loss (↓)</td>
+                            <td>3.789</td>
+                            <td>3.771</td>
+                            <td>3.785</td>
+                            <td>3.771</td>
+                            <td style="color: var(--warning)">3.719</td>
+                        </tr>
+                        <tr>
+                            <td class="mono">Final Pretrain Train Loss (↓)</td>
+                            <td>4.240</td>
+                            <td>4.229</td>
+                            <td>4.235</td>
+                            <td>4.225</td>
+                            <td style="color: var(--warning)">4.196</td>
+                        </tr>
+                        <tr>
+                            <td class="mono">ARC-Easy Zero-Shot (↑)</td>
+                            <td class="winner-badge">33.42%</td>
+                            <td>31.57%</td>
+                            <td>31.82%</td>
+                            <td>31.69%</td>
+                            <td>30.93%</td>
+                        </tr>
+                        <tr>
+                            <td class="mono">Wikitext Byte PPL (↓)</td>
+                            <td>1.4824</td>
+                            <td class="winner-badge">1.4750</td>
+                            <td>1.4851</td>
+                            <td>1.4918</td>
+                            <td>1.5017</td>
+                        </tr>
+                        <tr>
+                            <td class="mono">Wikitext Word PPL (↓)</td>
+                            <td>243.3377</td>
+                            <td class="winner-badge">236.8014</td>
+                            <td>245.8708</td>
+                            <td>252.0078</td>
+                            <td>261.4054</td>
+                        </tr>
+                        <tr class="highlight-row">
+                            <td style="font-weight: bold;">PRETRAIN ASSESSMENT</td>
+                            <td style="color: var(--success); font-weight: bold;">MAXIMUM KNOWLEDGE</td>
+                            <td style="color: var(--success); font-weight: bold;">SYNTAX ENHANCED</td>
+                            <td>Recycling Decay</td>
+                            <td>Memorization Masking</td>
+                            <td style="color: var(--warning)">Severe Overfitting</td>
+                        </tr>
+                    </tbody>
+                </table>
+            </div>
+        </div>
+        <span class="section-label" id="charts">// Plotting_The_Entropy_Divergence</span>
+        <div class="chart-box">
+            <h3>Factual Logic Degradation vs. Looping Cycles</h3>
+            <div style="position: relative; height:350px; width:100%">
+                <canvas id="epochsArcChart"></canvas>
+            </div>
+        </div>
+        <div class="chart-box">
+            <h3>The Overfitting Paradox: True Language PPL vs. Apparent Training Loss</h3>
+            <p style="font-size: 0.85rem; color: var(--muted); margin-bottom: 1.5rem;">Crucial observation: While recycling data (increasing epochs) forces the loss curve downward artificially, true out-of-distribution Perplexity steadily degrades.</p>
+            <div style="position: relative; height:350px; width:100%">
+                <canvas id="epochsDivergenceChart"></canvas>
+            </div>
+        </div>
+        <section class="stats-grid" id="hardware">
+            <div class="stat-box">
+                <small>CONSTANT MATRIX SIZE</small>
+                <strong>200M Processing Steps</strong>
+            </div>
+            <div class="stat-box">
+                <small>COMPUTE TOPOLOGY</small>
+                <strong>Shallow & Wide SOTA Layout</strong>
+            </div>
+            <div class="stat-box">
+                <small>DATASET ROUTING ENGINE</small>
+                <strong>FineWeb-Edu Target Stream</strong>
+            </div>
+        </section>
+        <footer>
+            <p>&copy; 2026 SupraLabs. High performance. Small footprints. Proudly open-source.</p>
+        </footer>
+    </div>
+    <script>
+        // ARC Accuracy across runs
+        const ctxArc = document.getElementById('epochsArcChart').getContext('2d');
+        new Chart(ctxArc, {
+            type: 'bar',
+            data: {
+                labels: ['Run 1: 200M×1', 'Run 2: 100M×2', 'Run 3: 50M×4', 'Run 4: 40M×5', 'Run 5: 25M×8'],
+                datasets: [{
+                    label: 'ARC-Easy: Factual Knowledge Accuracy (%)',
+                    data: [33.42, 31.57, 31.82, 31.69, 30.93],
+                    backgroundColor: ['rgba(0, 230, 118, 0.3)', 'rgba(83, 107, 254, 0.2)', 'rgba(83, 107, 254, 0.2)', 'rgba(83, 107, 254, 0.2)', 'rgba(229, 57, 53, 0.2)'],
+                    borderColor: ['#00e676', '#536bfe', '#536bfe', '#536bfe', '#e53935'],
+                    borderWidth: 2
+                }]
+            },
+            options: {
+                responsive: true,
+                maintainAspectRatio: false,
+                plugins: { legend: { labels: { color: '#bbb' } } },
+                scales: {
+                    y: { grid: { color: '#222' }, ticks: { color: '#888' }, min: 28 },
+                    x: { grid: { display: false }, ticks: { color: '#aaa' } }
+                }
+            }
+        });
+        // Paradox Divergence Line
+        const ctxDiv = document.getElementById('epochsDivergenceChart').getContext('2d');
+        new Chart(ctxDiv, {
+            type: 'line',
+            data: {
+                labels: ['Run 1 (1 Ep)', 'Run 2 (2 Ep)', 'Run 3 (4 Ep)', 'Run 4 (5 Ep)', 'Run 5 (8 Ep)'],
+                datasets: [
+                    {
+                        label: 'Apparent Pretrain Loss (Deceptive Collapse ↓)',
+                        data: [4.240, 4.229, 4.235, 4.225, 4.196],
+                        borderColor: 'rgba(229, 57, 53, 0.6)',
+                        backgroundColor: 'transparent',
+                        yAxisID: 'yLoss',
+                        tension: 0.1,
+                        borderWidth: 2
+                    },
+                    {
+                        label: 'Wikitext Word Perplexity (True Evaluation Stagnation ↑)',
+                        data: [243.33, 236.80, 245.87, 252.00, 261.40],
+                        borderColor: '#536bfe',
+                        backgroundColor: 'transparent',
+                        yAxisID: 'yPpl',
+                        tension: 0.2,
+                        borderWidth: 3
+                    }
+                ]
+            },
+            options: {
+                responsive: true,
+                maintainAspectRatio: false,
+                plugins: { legend: { labels: { color: '#bbb' } } },
+                scales: {
+                    x: { grid: { display: false }, ticks: { color: '#aaa' } },
+                    yLoss: {
+                        type: 'linear',
+                        position: 'left',
+                        title: { display: true, text: 'Pretrain Loss Metric', color: 'rgba(229, 57, 53, 0.8)' },
+                        grid: { color: '#222' },
+                        ticks: { color: '#888' }
+                    },
+                    yPpl: {
+                        type: 'linear',
+                        position: 'right',
+                        title: { display: true, text: 'Word Perplexity Scale', color: '#536bfe' },
+                        grid: { display: false },
+                        ticks: { color: '#888' }
+                    }
+                }
+            }
+        });
+    </script>
+</body>
+</html>