| - dataset: | |
| id: crosbylegal/RedlineBench | |
| task_id: redline_overall | |
| value: 47.3 | |
| date: "2026-06-17" | |
| source: | |
| url: https://intelligence.crosby.ai/benchmark/ | |
| name: RedlineBench report | |
| user: crosbylegal | |
| notes: "agent=claude-code; 3-LLM judge panel (majority vote); turn-weighted weighted pass rate (0-100); published report figure" | |