| [ |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mawps", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "math_counting_easy", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mbpp_sanitized", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "arc_easy", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mmlu_high_school_biology", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "math_algebra_medium", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mmlu_college_chemistry", |
| "ok": true, |
| "gpu": 7 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "gsm8k", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "multiarith", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mbpp", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "conala_curated", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "openbookqa", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mmlu_high_school_physics", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "humaneval_plus", |
| "ok": true, |
| "gpu": 1 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mawps", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "math_counting_easy", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mbpp_sanitized", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "arc_easy", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mmlu_high_school_biology", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "math_algebra_medium", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mmlu_college_chemistry", |
| "ok": true, |
| "gpu": 6 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "gsm8k", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "multiarith", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mbpp", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "conala_curated", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "openbookqa", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mmlu_high_school_physics", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "humaneval_plus", |
| "ok": true, |
| "gpu": 0 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "asdiv", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "math_algebra_easy", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "codealpaca_mini", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "sciq", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mmlu_elementary_math", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "gsm_hard", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "arc_challenge", |
| "ok": true, |
| "gpu": 5 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "asdiv", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "math_algebra_easy", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "codealpaca_mini", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "sciq", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mmlu_elementary_math", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "gsm_hard", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "arc_challenge", |
| "ok": true, |
| "gpu": 4 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "svamp", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "aqua_rat", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "humaneval", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "livecodebench_easy", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "medmcqa_easy", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "pubmedqa_pqal", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "Y", |
| "model": "meta-llama/Llama-3.2-3B-Instruct", |
| "task": "mbpp_plus", |
| "ok": true, |
| "gpu": 3 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "svamp", |
| "ok": true, |
| "gpu": 2 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "aqua_rat", |
| "ok": true, |
| "gpu": 2 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "humaneval", |
| "ok": true, |
| "gpu": 2 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "livecodebench_easy", |
| "ok": true, |
| "gpu": 2 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "medmcqa_easy", |
| "ok": true, |
| "gpu": 2 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "pubmedqa_pqal", |
| "ok": true, |
| "gpu": 2 |
| }, |
| { |
| "side": "X", |
| "model": "Qwen/Qwen2.5-3B-Instruct", |
| "task": "mbpp_plus", |
| "ok": true, |
| "gpu": 2 |
| } |
| ] |