// DDR-Bench Interactive Charts with Smooth Animations // Using Plotly.js with animate for smooth transitions // Common Plotly layout settings for dark theme const darkLayout = { paper_bgcolor: 'rgba(30, 41, 59, 0)', plot_bgcolor: 'rgba(30, 41, 59, 0)', font: { family: 'Inter, sans-serif', color: '#e2e8f0', size: 11 }, xaxis: { gridcolor: 'rgba(148, 163, 184, 0.12)', linecolor: 'rgba(148, 163, 184, 0.2)', tickfont: { color: '#94a3b8', size: 10 }, title: { font: { color: '#e2e8f0', size: 11 } } }, yaxis: { gridcolor: 'rgba(148, 163, 184, 0.12)', linecolor: 'rgba(148, 163, 184, 0.2)', tickfont: { color: '#94a3b8', size: 10 }, title: { font: { color: '#e2e8f0', size: 11 } } }, legend: { bgcolor: 'rgba(30, 41, 59, 0.9)', bordercolor: 'rgba(148, 163, 184, 0.2)', borderwidth: 1, font: { color: '#e2e8f0', size: 10 }, orientation: 'h', y: -0.2, x: 0.5, xanchor: 'center' }, hoverlabel: { bgcolor: '#1e293b', bordercolor: '#6366f1', font: { color: '#e2e8f0', size: 11 } }, margin: { t: 20, r: 15, b: 60, l: 50 } }; const plotlyConfig = { displayModeBar: true, responsive: true, modeBarButtonsToRemove: ['lasso2d', 'select2d', 'autoScale2d'], displaylogo: false }; // Animation settings for smooth transitions const animationSettings = { transition: { duration: 500, easing: 'cubic-in-out' }, frame: { duration: 500 } }; // Current state let currentScalingDim = 'turn'; let currentProbingMode = 'byTurn'; // Tab Navigation document.querySelectorAll('.nav-tab').forEach(tab => { tab.addEventListener('click', () => { document.querySelectorAll('.nav-tab').forEach(t => t.classList.remove('active')); tab.classList.add('active'); const sectionId = tab.dataset.section; document.querySelectorAll('.section').forEach(s => s.classList.remove('active')); document.getElementById(sectionId).classList.add('active'); // Resize plots on tab change setTimeout(() => window.dispatchEvent(new Event('resize')), 100); }); }); // ============================================================================ // SCALING ANALYSIS - 3 Charts with animated dimension switching // ============================================================================ // ============================================================================ // SCALING ANALYSIS - Normalized Coordinate System for Smooth Animation // ============================================================================ // Helper to normalize values to [0, 1] function normalizeData(values, type) { if (values.length === 0) return { normalized: [], min: 0, max: 1 }; let min, max; let normalized; if (type === 'log') { // Filter positive values for log const positiveValues = values.filter(v => v > 0); min = Math.min(...positiveValues); max = Math.max(...positiveValues); const logMin = Math.log10(min); const logMax = Math.log10(max); const range = logMax - logMin || 1; normalized = values.map(v => v > 0 ? (Math.log10(v) - logMin) / range : 0); } else { min = 0; // Always start linear scales at 0 for this use case max = Math.max(...values); const range = max - min || 1; normalized = values.map(v => (v - min) / range); } return { normalized, min, max }; } // Helper to generate pretty ticks for normalized scale [0, 1] function generateTicks(min, max, type) { const tickVals = [0, 0.2, 0.4, 0.6, 0.8, 1.0]; let tickText; if (type === 'log') { const logMin = Math.log10(min); const logMax = Math.log10(max); const range = logMax - logMin; tickText = tickVals.map(v => { const val = Math.pow(10, logMin + (v * range)); if (val >= 1) return val.toFixed(1); return val.toFixed(3); // More precision for small costs }); // Format as currency tickText = tickText.map(t => '$' + t); } else { const range = max - min; tickText = tickVals.map(v => { const val = min + (v * range); if (val >= 1000) return (val / 1000).toFixed(0) + 'k'; return val.toFixed(0); }); } return { tickVals, tickText }; } // Exact axis ranges from Python scripts const SCALING_Y_RANGES = { 'mimic': [5, 40], // Python: y_min=5, y_max=40 '10k': [0, 85], // Python: y_min=0, y_max=85 'globem': [0, 50] // Python: y_min=0, y_max=50 }; function initScalingCharts() { const scenarios = ['mimic', '10k', 'globem']; scenarios.forEach(scenario => { const data = DDR_DATA.scaling[scenario]; if (!data) return; const models = Object.keys(data); const traces = []; // Initial dimension is 'turn' const allTurns = models.flatMap(m => data[m].turns); const { normalized: normTurns, min: minTurn, max: maxTurn } = normalizeData(allTurns, 'linear'); const { tickVals, tickText } = generateTicks(minTurn, maxTurn, 'linear'); // We need to slice the normalized array back to per-model arrays let offset = 0; models.forEach(model => { const len = data[model].turns.length; const modelNormX = normTurns.slice(offset, offset + len); offset += len; traces.push({ x: modelNormX, y: data[model].accuracy, mode: 'lines+markers', name: model, line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 }, marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' }, hovertemplate: `${model}
Turn: %{customdata}
Accuracy: %{y:.2f}%`, customdata: data[model].turns // Store real values for hover }); }); const yRange = SCALING_Y_RANGES[scenario] || [0, 100]; const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Number of Interaction Turns', font: { size: 11, color: '#e2e8f0' } }, type: 'linear', // ALWAYS LINEAR range: [-0.05, 1.05], // FIXED RANGE tickmode: 'array', tickvals: tickVals, ticktext: tickText, zeroline: false }, yaxis: { ...darkLayout.yaxis, title: { text: 'Accuracy (%)', font: { size: 11, color: '#e2e8f0' } }, dtick: 5, range: yRange }, showlegend: true }; Plotly.newPlot(`scaling-${scenario}`, traces, layout, plotlyConfig); }); } // Inject CSS for line drawing animation const style = document.createElement('style'); style.textContent = ` .js-line path { transition: stroke-dashoffset 1s ease-out; } `; document.head.appendChild(style); function updateScalingCharts(dimension) { const scenarios = ['mimic', '10k', 'globem']; const xLabels = { 'turn': 'Number of Interaction Turns', 'token': 'Total Costed Tokens', 'cost': 'Inference Cost ($)' }; scenarios.forEach(scenario => { const data = DDR_DATA.scaling[scenario]; if (!data) return; const models = Object.keys(data); // 1. Collect all raw X values for normalization const allRawX = []; models.forEach(model => { switch (dimension) { case 'turn': allRawX.push(...data[model].turns); break; case 'token': allRawX.push(...data[model].tokens); break; case 'cost': allRawX.push(...data[model].costs); break; } }); // 2. Normalize data const type = dimension === 'cost' ? 'log' : 'linear'; const { normalized: allNormX, min: minX, max: maxX } = normalizeData(allRawX, type); const { tickVals, tickText } = generateTicks(minX, maxX, type); // 3. Prepare update data const newTraces = []; let offset = 0; const hoverLabels = { 'turn': 'Turns', 'token': 'Tokens', 'cost': 'Cost' }; const hoverFormat = dimension === 'token' ? (v) => v.toLocaleString() : (dimension === 'cost' ? (v) => '$' + v.toFixed(4) : (v) => v); models.forEach((model, i) => { const len = data[model].turns.length; const modelNormX = allNormX.slice(offset, offset + len); // Get raw values for customdata (hover) let rawValues; switch (dimension) { case 'turn': rawValues = data[model].turns; break; case 'token': rawValues = data[model].tokens; break; case 'cost': rawValues = data[model].costs; break; } offset += len; newTraces.push({ x: modelNormX, y: data[model].accuracy, customdata: rawValues, mode: 'lines+markers', // KEEP LINES - we'll hide them via CSS hovertemplate: `${model}
${hoverLabels[dimension]}: %{customdata}
Accuracy: %{y:.2f}%` }); }); // 4. Two-Phase Animation: Points Only -> Add Lines with Drawing Effect const graphDiv = document.getElementById(`scaling-${scenario}`); // Phase 1: Update to markers-only mode and animate points const markersOnlyTraces = newTraces.map(trace => ({ ...trace, mode: 'markers' // Remove lines completely })); // Update ticks Plotly.relayout(`scaling-${scenario}`, { 'xaxis.title.text': xLabels[dimension], 'xaxis.tickvals': tickVals, 'xaxis.ticktext': tickText }); // Animate points to new positions (no lines) Plotly.animate(`scaling-${scenario}`, { data: markersOnlyTraces, traces: models.map((_, i) => i) }, { transition: { duration: 500, easing: 'cubic-in-out' }, frame: { duration: 500, redraw: true } }).then(() => { // Phase 2: Add lines back and animate them drawing const linesAndMarkersTraces = newTraces.map(trace => ({ ...trace, mode: 'lines+markers' })); // Use Plotly.react and wait for it to complete Plotly.react(`scaling-${scenario}`, linesAndMarkersTraces, { ...graphDiv.layout }, plotlyConfig).then(() => { // Give browser time to render requestAnimationFrame(() => { requestAnimationFrame(() => { // Try multiple selectors to find the line paths let paths = graphDiv.querySelectorAll('.scatterlayer .js-line path'); if (paths.length === 0) { paths = graphDiv.querySelectorAll('.js-line path'); } if (paths.length === 0) { paths = graphDiv.querySelectorAll('path.js-line'); } if (paths.length === 0) { paths = graphDiv.querySelectorAll('.scatter path'); } paths.forEach((path, idx) => { const len = path.getTotalLength(); if (len > 0) { // Reset any previous animation path.style.transition = 'none'; path.style.strokeDasharray = len + ' ' + len; path.style.strokeDashoffset = len; // Force reflow path.getBoundingClientRect(); // Start animation after a tiny delay setTimeout(() => { path.style.transition = 'stroke-dashoffset 0.8s ease-out'; path.style.strokeDashoffset = '0'; }, 10); } }); }); }); }); }); }); } // Dimension toggle event listeners document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => { btn.addEventListener('click', () => { document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(b => b.classList.remove('active')); btn.classList.add('active'); const dimension = btn.dataset.dim; currentScalingDim = dimension; updateScalingCharts(dimension); }); }); // ============================================================================ // RANKING COMPARISON - 3 Charts with mode switching (novelty vs accuracy) // ============================================================================ let currentRankingMode = 'novelty'; function renderRankingCharts(mode) { const scenarios = [ { key: 'MIMIC', id: 'mimic' }, { key: '10K', id: '10k' }, { key: 'GLOBEM', id: 'globem' } ]; scenarios.forEach(({ key, id }) => { const rawData = DDR_DATA.ranking[key]; if (!rawData) return; // Sort models by the primary ranking let sortedModels; if (mode === 'novelty') { sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank); } else { sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank); } // Take top 12 for display const models = sortedModels.slice(0, 12); const traces = []; // Define colors const primaryColor = mode === 'novelty' ? '#8B5CF6' : '#22C55E'; const secondaryColor = mode === 'novelty' ? '#22C55E' : '#8B5CF6'; const primaryLabel = mode === 'novelty' ? 'Novelty Rank' : 'Accuracy Rank'; const secondaryLabel = mode === 'novelty' ? 'Accuracy Rank' : 'Novelty Rank'; // Connection lines (dashed) from primary to secondary models.forEach((m, i) => { const primaryX = mode === 'novelty' ? m.bt_rank : m.acc_rank; const secondaryX = mode === 'novelty' ? m.acc_rank : m.bt_rank; traces.push({ x: [primaryX, secondaryX], y: [i, i], mode: 'lines', line: { color: 'rgba(148, 163, 184, 0.4)', width: 1.5, dash: 'dot' }, showlegend: false, hoverinfo: 'skip' }); }); // Primary rank points (filled circles) traces.push({ x: models.map(m => mode === 'novelty' ? m.bt_rank : m.acc_rank), y: models.map((_, i) => i), mode: 'markers', name: primaryLabel, marker: { size: 11, symbol: 'circle', color: primaryColor, line: { color: '#fff', width: 1.5 } }, text: models.map(m => { if (mode === 'novelty') { return `${m.model}
Novelty: #${m.bt_rank}
Win Rate: ${m.win_rate}%`; } else { return `${m.model}
Accuracy: #${m.acc_rank}
${m.accuracy}%`; } }), hovertemplate: '%{text}' }); // Secondary rank points (diamond outline) traces.push({ x: models.map(m => mode === 'novelty' ? m.acc_rank : m.bt_rank), y: models.map((_, i) => i), mode: 'markers', name: secondaryLabel, marker: { size: 9, symbol: 'diamond-open', color: secondaryColor, line: { width: 2 } }, text: models.map(m => { if (mode === 'novelty') { return `${m.model}
Accuracy: #${m.acc_rank}
${m.accuracy}%`; } else { return `${m.model}
Novelty: #${m.bt_rank}
Win Rate: ${m.win_rate}%`; } }), hovertemplate: '%{text}' }); const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Rank', font: { size: 11, color: '#e2e8f0' } }, range: [23, 0], // Fixed range for all charts dtick: 5, tick0: 0 }, yaxis: { ...darkLayout.yaxis, tickmode: 'array', tickvals: models.map((_, i) => i), ticktext: models.map(m => m.model.length > 16 ? m.model.substring(0, 14) + '...' : m.model), automargin: true, range: [-0.5, models.length - 0.5] }, showlegend: true, legend: { ...darkLayout.legend, y: -0.18, orientation: 'h', x: 0.5, xanchor: 'center' }, margin: { t: 20, r: 15, b: 65, l: 120 } }; Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig); }); } function initRankingCharts() { renderRankingCharts('novelty'); } // Ranking mode toggle event listener document.querySelectorAll('.ranking-dim').forEach(btn => { btn.addEventListener('click', () => { document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active')); btn.classList.add('active'); const mode = btn.dataset.mode; currentRankingMode = mode; renderRankingCharts(mode); }); }); // ============================================================================ // TURN DISTRIBUTION - 3 Charts (Ridgeline style) // ============================================================================ function initTurnCharts() { const scenarios = ['mimic', '10k', 'globem']; // Family colors const familyColors = { 'claude': '#FF6D00', 'gpt': '#00C853', 'gemini': '#2196F3', 'deepseek': '#E91E63', 'glm': '#9C27B0', 'kimi': '#FFA500', 'minimax': '#20B2AA', 'qwen': '#0EA5E9', 'llama': '#F59E0B' }; function getModelColor(modelName) { const lower = modelName.toLowerCase(); for (const [family, color] of Object.entries(familyColors)) { if (lower.includes(family)) return color; } return '#888'; } scenarios.forEach(scenario => { const data = DDR_DATA.turn[scenario]; if (!data) return; // Sort by median descending (highest median at top) const sortedData = [...data].sort((a, b) => b.median - a.median); // Limit to top 15 models for readability const displayData = sortedData.slice(0, 15); const traces = []; const binLabels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100']; const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95]; // Create ridgeline traces (area charts stacked vertically) displayData.forEach((model, idx) => { const color = getModelColor(model.model); const yOffset = idx; // Scale distribution to fit in the row (max height ~0.8) const maxDist = Math.max(...model.distribution) || 1; const scaledDist = model.distribution.map(d => d / maxDist * 0.7); // Create filled area trace traces.push({ x: binCenters, y: scaledDist.map(d => yOffset + d), mode: 'lines', fill: 'toself', fillcolor: color + '40', // 25% opacity line: { color: color, width: 1.5 }, name: model.model, text: model.distribution.map((d, i) => `${model.model}
${binLabels[i]} turns: ${d.toFixed(1)}%
Median: ${model.median}` ), hovertemplate: '%{text}', showlegend: false }); // Add baseline traces.push({ x: [0, 100], y: [yOffset, yOffset], mode: 'lines', line: { color: 'rgba(148, 163, 184, 0.2)', width: 0.5 }, hoverinfo: 'skip', showlegend: false }); }); const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: 'Number of Turns', font: { size: 11, color: '#e2e8f0' } }, range: [0, 100], dtick: 20 }, yaxis: { ...darkLayout.yaxis, tickmode: 'array', tickvals: displayData.map((_, i) => i), ticktext: displayData.map(m => m.model.length > 20 ? m.model.substring(0, 18) + '...' : m.model), automargin: true, range: [-0.5, displayData.length] }, margin: { ...darkLayout.margin, l: 140 }, showlegend: false }; Plotly.newPlot(`turn-${scenario}`, traces, layout, plotlyConfig); }); } // ============================================================================ // PROBING RESULTS - 3 Charts with animated mode switching // ============================================================================ function initProbingCharts() { renderProbingCharts('byTurn'); } function renderProbingCharts(mode) { const scenarios = ['mimic', 'globem', '10k']; const scenarioIds = { 'mimic': 'mimic', 'globem': 'globem', '10k': '10k' }; scenarios.forEach(scenario => { const data = DDR_DATA.probing[mode]?.[scenario]; if (!data) return; const traces = []; const models = Object.keys(data); models.forEach(model => { const modelData = data[model]; const xKey = mode === 'byTurn' ? 'turns' : 'progress'; const xLabel = mode === 'byTurn' ? 'Turn' : 'Progress (%)'; // Main line traces.push({ x: modelData[xKey], y: modelData.logprob, mode: 'lines+markers', name: model, line: { color: DDR_DATA.probingColors[model] || '#888', width: 2 }, marker: { size: 4, color: DDR_DATA.probingColors[model] || '#888' }, hovertemplate: `${model}
${xLabel}: %{x}
Log Prob: %{y:.2f}` }); // Error band if (modelData.sem) { const upper = modelData.logprob.map((v, i) => v + modelData.sem[i]); const lower = modelData.logprob.map((v, i) => v - modelData.sem[i]); traces.push({ x: [...modelData[xKey], ...modelData[xKey].slice().reverse()], y: [...upper, ...lower.slice().reverse()], fill: 'toself', fillcolor: (DDR_DATA.probingColors[model] || '#888') + '25', line: { width: 0 }, showlegend: false, hoverinfo: 'skip' }); } }); const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, title: { text: mode === 'byTurn' ? 'Turn' : 'Interaction Progress (%)', font: { size: 11, color: '#e2e8f0' } } }, yaxis: { ...darkLayout.yaxis, title: { text: 'Avg Log Probability', font: { size: 11, color: '#e2e8f0' } } }, showlegend: true }; Plotly.newPlot(`probing-${scenarioIds[scenario]}`, traces, layout, plotlyConfig); }); } // Probing dimension toggle document.querySelectorAll('.probing-dim').forEach(btn => { btn.addEventListener('click', () => { document.querySelectorAll('.probing-dim').forEach(b => b.classList.remove('active')); btn.classList.add('active'); const mode = btn.dataset.mode; currentProbingMode = mode; // Add updating class for visual feedback ['mimic', 'globem', '10k'].forEach(s => { document.getElementById(`probing-${s}`).classList.add('chart-updating'); }); setTimeout(() => { renderProbingCharts(mode); ['mimic', 'globem', '10k'].forEach(s => { document.getElementById(`probing-${s}`).classList.remove('chart-updating'); }); }, 150); }); }); // ============================================================================ // ERROR ANALYSIS - Hierarchical Bar Chart // ============================================================================ function initErrorChart() { const data = DDR_DATA.error; if (!data || data.length === 0) return; // Group by main category for bracket annotations const categoryGroups = {}; data.forEach((item, idx) => { if (!categoryGroups[item.main_category]) { categoryGroups[item.main_category] = { start: idx, end: idx, items: [] }; } categoryGroups[item.main_category].end = idx; categoryGroups[item.main_category].items.push(item); }); const traces = [{ x: data.map(d => d.subcategory), y: data.map(d => d.percentage), type: 'bar', marker: { color: data.map(d => d.color), line: { color: '#fff', width: 0.5 } }, text: data.map(d => `${d.percentage}%`), textposition: 'outside', textfont: { size: 11, color: '#e2e8f0' }, hovertemplate: '%{x}
%{y:.1f}%
Count: %{customdata}', customdata: data.map(d => d.count), showlegend: false }]; const maxPct = Math.max(...data.map(d => d.percentage)); // Create annotations for main category labels const annotations = []; Object.entries(categoryGroups).forEach(([catName, group]) => { const midIdx = (group.start + group.end) / 2; annotations.push({ x: midIdx, y: maxPct * 1.15, text: `${catName}`, showarrow: false, font: { size: 10, color: '#e2e8f0' }, xanchor: 'center', yanchor: 'bottom' }); }); const layout = { ...darkLayout, xaxis: { ...darkLayout.xaxis, tickangle: -30, tickfont: { size: 10, color: '#94a3b8' } }, yaxis: { ...darkLayout.yaxis, title: { text: 'Percentage (%)', font: { size: 11, color: '#e2e8f0' } }, range: [0, maxPct * 1.25] }, annotations: annotations, margin: { t: 50, r: 20, b: 100, l: 50 } }; Plotly.newPlot('error-chart', traces, layout, plotlyConfig); } // ============================================================================ // INITIALIZE ALL CHARTS // ============================================================================ document.addEventListener('DOMContentLoaded', () => { initScalingCharts(); initRankingCharts(); initTurnCharts(); initErrorChart(); initProbingCharts(); }); // Handle window resize let resizeTimeout; window.addEventListener('resize', () => { clearTimeout(resizeTimeout); resizeTimeout = setTimeout(() => { ['mimic', '10k', 'globem'].forEach(s => { Plotly.Plots.resize(`scaling-${s}`); Plotly.Plots.resize(`ranking-${s}`); Plotly.Plots.resize(`turn-${s}`); Plotly.Plots.resize(`probing-${s}`); }); if (document.getElementById('error-chart')) { Plotly.Plots.resize('error-chart'); } }, 100); });