dots.mocr / .eval_results /mdpbench.yaml
ygfrancois's picture
Add MDPBench evaluation results (#3)
92a22a4
- dataset:
id: Delores-Lin/MDPBench
task_id: overall
value: 80.5
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: digital
value: 90.5
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: photographed
value: 77.2
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: latin
value: 81.7
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: de
value: 82.6
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: en
value: 87.4
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: es
value: 71.3
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: fr
value: 70.1
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: id
value: 84.5
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: it
value: 89.3
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: nl
value: 83.2
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: pt
value: 86.8
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: vi
value: 79.9
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: non_latin
value: 79.2
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: ar
value: 83.3
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: hi
value: 83.6
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: jp
value: 75.0
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: ko
value: 78.7
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: ru
value: 71.2
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: th
value: 77.9
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: zh
value: 84.6
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin
- dataset:
id: Delores-Lin/MDPBench
task_id: zh_t
value: 79.6
date: "2026-04-14"
source:
url: https://huggingface.co/datasets/Delores-Lin/MDPBench
name: MDPBench leaderboard
user: Delores-Lin