// DDR-Bench Interactive Charts with Smooth Animations
// Using Plotly.js with animate for smooth transitions
// Common Plotly layout settings for dark theme
const darkLayout = {
paper_bgcolor: 'rgba(30, 41, 59, 0)',
plot_bgcolor: 'rgba(30, 41, 59, 0)',
font: {
family: 'Inter, sans-serif',
color: '#e2e8f0',
size: 11
},
xaxis: {
gridcolor: 'rgba(148, 163, 184, 0.12)',
linecolor: 'rgba(148, 163, 184, 0.2)',
tickfont: { color: '#94a3b8', size: 10 },
title: { font: { color: '#e2e8f0', size: 11 } }
},
yaxis: {
gridcolor: 'rgba(148, 163, 184, 0.12)',
linecolor: 'rgba(148, 163, 184, 0.2)',
tickfont: { color: '#94a3b8', size: 10 },
title: { font: { color: '#e2e8f0', size: 11 } }
},
legend: {
bgcolor: 'rgba(30, 41, 59, 0.9)',
bordercolor: 'rgba(148, 163, 184, 0.2)',
borderwidth: 1,
font: { color: '#e2e8f0', size: 10 },
orientation: 'h',
y: -0.2,
x: 0.5,
xanchor: 'center'
},
hoverlabel: {
bgcolor: '#1e293b',
bordercolor: '#6366f1',
font: { color: '#e2e8f0', size: 11 }
},
margin: { t: 20, r: 15, b: 60, l: 50 }
};
const plotlyConfig = {
displayModeBar: true,
responsive: true,
modeBarButtonsToRemove: ['lasso2d', 'select2d', 'autoScale2d'],
displaylogo: false
};
// Animation settings for smooth transitions
const animationSettings = {
transition: {
duration: 500,
easing: 'cubic-in-out'
},
frame: {
duration: 500
}
};
// Current state
let currentScalingDim = 'turn';
let currentProbingMode = 'byTurn';
// ============================================================================
// SCALING ANALYSIS - 3 Charts with animated dimension switching
// ============================================================================
// ============================================================================
// SCALING ANALYSIS - Normalized Coordinate System for Smooth Animation
// ============================================================================
// Helper to normalize values to [0, 1]
function normalizeData(values, type) {
if (values.length === 0) return { normalized: [], min: 0, max: 1 };
let min, max;
let normalized;
if (type === 'log') {
// Filter positive values for log
const positiveValues = values.filter(v => v > 0);
min = Math.min(...positiveValues);
max = Math.max(...positiveValues);
const logMin = Math.log10(min);
const logMax = Math.log10(max);
const range = logMax - logMin || 1;
normalized = values.map(v => v > 0 ? (Math.log10(v) - logMin) / range : 0);
} else {
min = 0; // Always start linear scales at 0 for this use case
max = Math.max(...values);
const range = max - min || 1;
normalized = values.map(v => (v - min) / range);
}
return { normalized, min, max };
}
// Helper to generate pretty ticks for normalized scale [0, 1]
function generateTicks(min, max, type) {
const tickVals = [0, 0.2, 0.4, 0.6, 0.8, 1.0];
let tickText;
if (type === 'log') {
const logMin = Math.log10(min);
const logMax = Math.log10(max);
const range = logMax - logMin;
tickText = tickVals.map(v => {
const val = Math.pow(10, logMin + (v * range));
if (val >= 1) return val.toFixed(1);
return val.toFixed(3); // More precision for small costs
});
// Format as currency
tickText = tickText.map(t => '$' + t);
} else {
const range = max - min;
tickText = tickVals.map(v => {
const val = min + (v * range);
if (val >= 1000) return (val / 1000).toFixed(0) + 'k';
return val.toFixed(0);
});
}
return { tickVals, tickText };
}
// Exact axis ranges from Python scripts
const SCALING_Y_RANGES = {
'mimic': [5, 40], // Python: y_min=5, y_max=40
'10k': [0, 85], // Python: y_min=0, y_max=85
'globem': [0, 50] // Python: y_min=0, y_max=50
};
function initScalingCharts() {
const scenarios = ['mimic', '10k', 'globem'];
scenarios.forEach(scenario => {
const data = DDR_DATA.scaling[scenario];
if (!data) return;
const models = Object.keys(data);
const traces = [];
// Initial dimension is 'turn'
const allTurns = models.flatMap(m => data[m].turns);
const { normalized: normTurns, min: minTurn, max: maxTurn } = normalizeData(allTurns, 'linear');
const { tickVals, tickText } = generateTicks(minTurn, maxTurn, 'linear');
// We need to slice the normalized array back to per-model arrays
let offset = 0;
models.forEach(model => {
const len = data[model].turns.length;
const modelNormX = normTurns.slice(offset, offset + len);
offset += len;
traces.push({
x: modelNormX,
y: data[model].accuracy,
mode: 'lines+markers',
name: model,
line: { color: DDR_DATA.modelColors[model] || '#888', width: 2 },
marker: { size: 6, color: DDR_DATA.modelColors[model] || '#888' },
hovertemplate: `${model}
Turn: %{customdata}
Accuracy: %{y:.2f}%`,
customdata: data[model].turns // Store real values for hover
});
});
const yRange = SCALING_Y_RANGES[scenario] || [0, 100];
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Number of Interaction Turns', font: { size: 11, color: '#e2e8f0' } },
type: 'linear', // ALWAYS LINEAR
range: [-0.05, 1.05], // FIXED RANGE
tickmode: 'array',
tickvals: tickVals,
ticktext: tickText,
zeroline: false
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Accuracy (%)', font: { size: 11, color: '#e2e8f0' } },
dtick: 5,
range: yRange
},
showlegend: true
};
Plotly.newPlot(`scaling-${scenario}`, traces, layout, plotlyConfig);
});
}
// Inject CSS for line drawing animation
const style = document.createElement('style');
style.textContent = `
.js-line path {
transition: stroke-dashoffset 1s ease-out;
}
`;
document.head.appendChild(style);
function updateScalingCharts(dimension) {
const scenarios = ['mimic', '10k', 'globem'];
const xLabels = {
'turn': 'Number of Interaction Turns',
'token': 'Total Costed Tokens',
'cost': 'Inference Cost ($)'
};
scenarios.forEach(scenario => {
const data = DDR_DATA.scaling[scenario];
if (!data) return;
const models = Object.keys(data);
// 1. Collect all raw X values for normalization
const allRawX = [];
models.forEach(model => {
switch (dimension) {
case 'turn': allRawX.push(...data[model].turns); break;
case 'token': allRawX.push(...data[model].tokens); break;
case 'cost': allRawX.push(...data[model].costs); break;
}
});
// 2. Normalize data
const type = dimension === 'cost' ? 'log' : 'linear';
const { normalized: allNormX, min: minX, max: maxX } = normalizeData(allRawX, type);
const { tickVals, tickText } = generateTicks(minX, maxX, type);
// 3. Prepare update data
const newTraces = [];
let offset = 0;
const hoverLabels = { 'turn': 'Turns', 'token': 'Tokens', 'cost': 'Cost' };
const hoverFormat = dimension === 'token' ? (v) => v.toLocaleString() : (dimension === 'cost' ? (v) => '$' + v.toFixed(4) : (v) => v);
models.forEach((model, i) => {
const len = data[model].turns.length;
const modelNormX = allNormX.slice(offset, offset + len);
// Get raw values for customdata (hover)
let rawValues;
switch (dimension) {
case 'turn': rawValues = data[model].turns; break;
case 'token': rawValues = data[model].tokens; break;
case 'cost': rawValues = data[model].costs; break;
}
offset += len;
newTraces.push({
x: modelNormX,
y: data[model].accuracy,
customdata: rawValues,
mode: 'lines+markers', // KEEP LINES - we'll hide them via CSS
hovertemplate: `${model}
${hoverLabels[dimension]}: %{customdata}
Accuracy: %{y:.2f}%`
});
});
// 4. Two-Phase Animation: Points Only -> Add Lines with Drawing Effect
const graphDiv = document.getElementById(`scaling-${scenario}`);
// Phase 1: Update to markers-only mode and animate points
const markersOnlyTraces = newTraces.map(trace => ({
...trace,
mode: 'markers' // Remove lines completely
}));
// Update ticks
Plotly.relayout(`scaling-${scenario}`, {
'xaxis.title.text': xLabels[dimension],
'xaxis.tickvals': tickVals,
'xaxis.ticktext': tickText
});
// Animate points to new positions (no lines)
Plotly.animate(`scaling-${scenario}`, {
data: markersOnlyTraces,
traces: models.map((_, i) => i)
}, {
transition: {
duration: 500,
easing: 'cubic-in-out'
},
frame: {
duration: 500,
redraw: true
}
}).then(() => {
// Phase 2: Add lines back and animate them drawing
const linesAndMarkersTraces = newTraces.map(trace => ({
...trace,
mode: 'lines+markers'
}));
// Use Plotly.react and wait for it to complete
Plotly.react(`scaling-${scenario}`, linesAndMarkersTraces, {
...graphDiv.layout
}, plotlyConfig).then(() => {
// Give browser time to render
requestAnimationFrame(() => {
requestAnimationFrame(() => {
// Try multiple selectors to find the line paths
let paths = graphDiv.querySelectorAll('.scatterlayer .js-line path');
if (paths.length === 0) {
paths = graphDiv.querySelectorAll('.js-line path');
}
if (paths.length === 0) {
paths = graphDiv.querySelectorAll('path.js-line');
}
if (paths.length === 0) {
paths = graphDiv.querySelectorAll('.scatter path');
}
paths.forEach((path, idx) => {
const len = path.getTotalLength();
if (len > 0) {
// Reset any previous animation
path.style.transition = 'none';
path.style.strokeDasharray = len + ' ' + len;
path.style.strokeDashoffset = len;
// Force reflow
path.getBoundingClientRect();
// Start animation after a tiny delay
setTimeout(() => {
path.style.transition = 'stroke-dashoffset 0.8s ease-out';
path.style.strokeDashoffset = '0';
}, 10);
}
});
});
});
});
});
});
}
// Dimension toggle event listeners
document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.dim-btn:not(.probing-dim)').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
const dimension = btn.dataset.dim;
currentScalingDim = dimension;
updateScalingCharts(dimension);
});
});
// ============================================================================
// RANKING COMPARISON - 3 Charts with mode switching (novelty vs accuracy)
// ============================================================================
let currentRankingMode = 'novelty';
function renderRankingCharts(mode) {
const scenarios = [
{ key: 'MIMIC', id: 'mimic' },
{ key: '10K', id: '10k' },
{ key: 'GLOBEM', id: 'globem' }
];
scenarios.forEach(({ key, id }) => {
const rawData = DDR_DATA.ranking[key];
if (!rawData) return;
// Sort models by the primary ranking
let sortedModels;
if (mode === 'novelty') {
sortedModels = [...rawData].sort((a, b) => a.bt_rank - b.bt_rank);
} else {
sortedModels = [...rawData].sort((a, b) => a.acc_rank - b.acc_rank);
}
// Take top 12 for display
const models = sortedModels.slice(0, 12);
const traces = [];
// Define colors
const primaryColor = mode === 'novelty' ? '#8B5CF6' : '#22C55E';
const secondaryColor = mode === 'novelty' ? '#22C55E' : '#8B5CF6';
const primaryLabel = mode === 'novelty' ? 'Novelty Rank' : 'Accuracy Rank';
const secondaryLabel = mode === 'novelty' ? 'Accuracy Rank' : 'Novelty Rank';
// Connection lines (dashed) from primary to secondary
models.forEach((m, i) => {
const primaryX = mode === 'novelty' ? m.bt_rank : m.acc_rank;
const secondaryX = mode === 'novelty' ? m.acc_rank : m.bt_rank;
traces.push({
x: [primaryX, secondaryX],
y: [i, i],
mode: 'lines',
line: {
color: 'rgba(148, 163, 184, 0.4)',
width: 1.5,
dash: 'dot'
},
showlegend: false,
hoverinfo: 'skip'
});
});
// Primary rank points (filled circles)
traces.push({
x: models.map(m => mode === 'novelty' ? m.bt_rank : m.acc_rank),
y: models.map((_, i) => i),
mode: 'markers',
name: primaryLabel,
marker: {
size: 11,
symbol: 'circle',
color: primaryColor,
line: { color: '#fff', width: 1.5 }
},
text: models.map(m => {
if (mode === 'novelty') {
return `${m.model}
Novelty: #${m.bt_rank}
Win Rate: ${m.win_rate}%`;
} else {
return `${m.model}
Accuracy: #${m.acc_rank}
${m.accuracy}%`;
}
}),
hovertemplate: '%{text}'
});
// Secondary rank points (diamond outline)
traces.push({
x: models.map(m => mode === 'novelty' ? m.acc_rank : m.bt_rank),
y: models.map((_, i) => i),
mode: 'markers',
name: secondaryLabel,
marker: {
size: 9,
symbol: 'diamond-open',
color: secondaryColor,
line: { width: 2 }
},
text: models.map(m => {
if (mode === 'novelty') {
return `${m.model}
Accuracy: #${m.acc_rank}
${m.accuracy}%`;
} else {
return `${m.model}
Novelty: #${m.bt_rank}
Win Rate: ${m.win_rate}%`;
}
}),
hovertemplate: '%{text}'
});
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Rank', font: { size: 11, color: '#e2e8f0' } },
range: [23, 0], // Fixed range for all charts
dtick: 5,
tick0: 0
},
yaxis: {
...darkLayout.yaxis,
tickmode: 'array',
tickvals: models.map((_, i) => i),
ticktext: models.map(m => m.model.length > 16 ? m.model.substring(0, 14) + '...' : m.model),
automargin: true,
range: [-0.5, models.length - 0.5]
},
showlegend: true,
legend: {
...darkLayout.legend,
y: -0.18,
orientation: 'h',
x: 0.5,
xanchor: 'center'
},
margin: { t: 20, r: 15, b: 65, l: 120 }
};
Plotly.react(`ranking-${id}`, traces, layout, plotlyConfig);
});
}
function initRankingCharts() {
renderRankingCharts('novelty');
}
// Ranking mode toggle event listener
document.querySelectorAll('.ranking-dim').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.ranking-dim').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
const mode = btn.dataset.mode;
currentRankingMode = mode;
renderRankingCharts(mode);
});
});
// ============================================================================
// TURN DISTRIBUTION - 3 Charts (Ridgeline style)
// ============================================================================
function initTurnCharts() {
const scenarios = ['mimic', '10k', 'globem'];
// Family colors
const familyColors = {
'claude': '#FF6D00',
'gpt': '#00C853',
'gemini': '#2196F3',
'deepseek': '#E91E63',
'glm': '#9C27B0',
'kimi': '#FFA500',
'minimax': '#20B2AA',
'qwen': '#0EA5E9',
'llama': '#F59E0B'
};
function getModelColor(modelName) {
const lower = modelName.toLowerCase();
for (const [family, color] of Object.entries(familyColors)) {
if (lower.includes(family)) return color;
}
return '#888';
}
scenarios.forEach(scenario => {
const data = DDR_DATA.turn[scenario];
if (!data) return;
// Sort by median descending (highest median at top)
const sortedData = [...data].sort((a, b) => b.median - a.median);
// Limit to top 15 models for readability
const displayData = sortedData.slice(0, 15);
const traces = [];
const binLabels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100'];
const binCenters = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95];
// Create ridgeline traces (area charts stacked vertically)
displayData.forEach((model, idx) => {
const color = getModelColor(model.model);
const yOffset = idx;
// Scale distribution to fit in the row (max height ~0.8)
const maxDist = Math.max(...model.distribution) || 1;
const scaledDist = model.distribution.map(d => d / maxDist * 0.7);
// Create filled area trace
traces.push({
x: binCenters,
y: scaledDist.map(d => yOffset + d),
mode: 'lines',
fill: 'toself',
fillcolor: color + '40', // 25% opacity
line: { color: color, width: 1.5 },
name: model.model,
text: model.distribution.map((d, i) =>
`${model.model}
${binLabels[i]} turns: ${d.toFixed(1)}%
Median: ${model.median}`
),
hovertemplate: '%{text}',
showlegend: false
});
// Add baseline
traces.push({
x: [0, 100],
y: [yOffset, yOffset],
mode: 'lines',
line: { color: 'rgba(148, 163, 184, 0.2)', width: 0.5 },
hoverinfo: 'skip',
showlegend: false
});
});
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: 'Number of Turns', font: { size: 11, color: '#e2e8f0' } },
range: [0, 100],
dtick: 20
},
yaxis: {
...darkLayout.yaxis,
tickmode: 'array',
tickvals: displayData.map((_, i) => i),
ticktext: displayData.map(m => m.model.length > 20 ? m.model.substring(0, 18) + '...' : m.model),
automargin: true,
range: [-0.5, displayData.length]
},
margin: { ...darkLayout.margin, l: 140 },
showlegend: false
};
Plotly.newPlot(`turn-${scenario}`, traces, layout, plotlyConfig);
});
}
// ============================================================================
// PROBING RESULTS - 3 Charts with animated mode switching
// ============================================================================
function initProbingCharts() {
renderProbingCharts('byTurn');
}
function renderProbingCharts(mode) {
const scenarios = ['mimic', 'globem', '10k'];
const scenarioIds = { 'mimic': 'mimic', 'globem': 'globem', '10k': '10k' };
scenarios.forEach(scenario => {
const data = DDR_DATA.probing[mode]?.[scenario];
if (!data) return;
const traces = [];
const models = Object.keys(data);
models.forEach(model => {
const modelData = data[model];
const xKey = mode === 'byTurn' ? 'turns' : 'progress';
const xLabel = mode === 'byTurn' ? 'Turn' : 'Progress (%)';
// Main line
traces.push({
x: modelData[xKey],
y: modelData.logprob,
mode: 'lines+markers',
name: model,
line: {
color: DDR_DATA.probingColors[model] || '#888',
width: 2
},
marker: {
size: 4,
color: DDR_DATA.probingColors[model] || '#888'
},
hovertemplate: `${model}
${xLabel}: %{x}
Log Prob: %{y:.2f}`
});
// Error band
if (modelData.sem) {
const upper = modelData.logprob.map((v, i) => v + modelData.sem[i]);
const lower = modelData.logprob.map((v, i) => v - modelData.sem[i]);
traces.push({
x: [...modelData[xKey], ...modelData[xKey].slice().reverse()],
y: [...upper, ...lower.slice().reverse()],
fill: 'toself',
fillcolor: (DDR_DATA.probingColors[model] || '#888') + '25',
line: { width: 0 },
showlegend: false,
hoverinfo: 'skip'
});
}
});
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
title: { text: mode === 'byTurn' ? 'Turn' : 'Interaction Progress (%)', font: { size: 11, color: '#e2e8f0' } }
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Avg Log Probability', font: { size: 11, color: '#e2e8f0' } }
},
showlegend: true
};
Plotly.newPlot(`probing-${scenarioIds[scenario]}`, traces, layout, plotlyConfig);
});
}
// Probing dimension toggle
document.querySelectorAll('.probing-dim').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.probing-dim').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
const mode = btn.dataset.mode;
currentProbingMode = mode;
// Add updating class for visual feedback
['mimic', 'globem', '10k'].forEach(s => {
document.getElementById(`probing-${s}`).classList.add('chart-updating');
});
setTimeout(() => {
renderProbingCharts(mode);
['mimic', 'globem', '10k'].forEach(s => {
document.getElementById(`probing-${s}`).classList.remove('chart-updating');
});
}, 150);
});
});
// ============================================================================
// ERROR ANALYSIS - Hierarchical Bar Chart
// ============================================================================
function initErrorChart() {
const data = DDR_DATA.error;
if (!data || data.length === 0) return;
// Group by main category for bracket annotations
const categoryGroups = {};
data.forEach((item, idx) => {
if (!categoryGroups[item.main_category]) {
categoryGroups[item.main_category] = { start: idx, end: idx, items: [] };
}
categoryGroups[item.main_category].end = idx;
categoryGroups[item.main_category].items.push(item);
});
const traces = [{
x: data.map(d => d.subcategory),
y: data.map(d => d.percentage),
type: 'bar',
marker: {
color: data.map(d => d.color),
line: { color: '#fff', width: 0.5 }
},
text: data.map(d => `${d.percentage}%`),
textposition: 'outside',
textfont: { size: 11, color: '#e2e8f0' },
hovertemplate: '%{x}
%{y:.1f}%
Count: %{customdata}',
customdata: data.map(d => d.count),
showlegend: false
}];
const maxPct = Math.max(...data.map(d => d.percentage));
// Create annotations for main category labels
const annotations = [];
Object.entries(categoryGroups).forEach(([catName, group]) => {
const midIdx = (group.start + group.end) / 2;
annotations.push({
x: midIdx,
y: maxPct * 1.15,
text: `${catName}`,
showarrow: false,
font: { size: 10, color: '#e2e8f0' },
xanchor: 'center',
yanchor: 'bottom'
});
});
const layout = {
...darkLayout,
xaxis: {
...darkLayout.xaxis,
tickangle: -30,
tickfont: { size: 10, color: '#94a3b8' }
},
yaxis: {
...darkLayout.yaxis,
title: { text: 'Percentage (%)', font: { size: 11, color: '#e2e8f0' } },
range: [0, maxPct * 1.25]
},
annotations: annotations,
margin: { t: 50, r: 20, b: 100, l: 50 }
};
Plotly.newPlot('error-chart', traces, layout, plotlyConfig);
}
// ============================================================================
// INITIALIZE ALL CHARTS
// ============================================================================
document.addEventListener('DOMContentLoaded', () => {
initScalingCharts();
initRankingCharts();
initTurnCharts();
initErrorChart();
initProbingCharts();
});
// Handle window resize
let resizeTimeout;
window.addEventListener('resize', () => {
clearTimeout(resizeTimeout);
resizeTimeout = setTimeout(() => {
['mimic', '10k', 'globem'].forEach(s => {
Plotly.Plots.resize(`scaling-${s}`);
Plotly.Plots.resize(`ranking-${s}`);
Plotly.Plots.resize(`turn-${s}`);
Plotly.Plots.resize(`probing-${s}`);
});
if (document.getElementById('error-chart')) {
Plotly.Plots.resize('error-chart');
}
}, 100);
});