Spaces:
Runtime error
Runtime error
update claude 4.5 flores results
Browse files
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -18,11 +18,11 @@
|
|
| 18 |
"GPT-4.1 (April)": 67.5,
|
| 19 |
"LLaMa 4 405B": 45.5,
|
| 20 |
"Lugha-Llama 8B": 36.7,
|
|
|
|
| 21 |
"Gemini-2.5 Flash": 69.9,
|
| 22 |
-
"Claude 4.0 Sonnet": 68.1,
|
| 23 |
-
"Claude 3.7 Sonnet": 59.8,
|
| 24 |
"Claude 4.5 Sonnet": 71.7,
|
| 25 |
-
"
|
|
|
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
@@ -44,11 +44,11 @@
|
|
| 44 |
"GPT-4.1 (April)": 84.4,
|
| 45 |
"LLaMa 4 405B": 73.9,
|
| 46 |
"Lugha-Llama 8B": 4.1,
|
|
|
|
| 47 |
"Gemini-2.5 Flash": 87.9,
|
| 48 |
-
"Claude 4.0 Sonnet": 80.4,
|
| 49 |
-
"Claude 3.7 Sonnet": 73.4,
|
| 50 |
"Claude 4.5 Sonnet": 81.6,
|
| 51 |
-
"
|
|
|
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
@@ -70,10 +70,11 @@
|
|
| 70 |
"GPT-4.1 (April)": 47.3,
|
| 71 |
"LLaMa 4 405B": 42.8,
|
| 72 |
"Lugha-Llama 8B": 22.1,
|
|
|
|
| 73 |
"Gemini-2.5 Flash": 46.5,
|
| 74 |
-
"Claude 4.
|
| 75 |
-
"Claude
|
| 76 |
-
"
|
| 77 |
}
|
| 78 |
},
|
| 79 |
"MMLU": {
|
|
@@ -95,11 +96,11 @@
|
|
| 95 |
"GPT-4.1 (April)": 60.2,
|
| 96 |
"LLaMa 4 405B": 15.8,
|
| 97 |
"Lugha-Llama 8B": 25.2,
|
|
|
|
| 98 |
"Gemini-2.5 Flash": 67.7,
|
| 99 |
-
"Claude 4.0 Sonnet": 75.5,
|
| 100 |
-
"Claude 3.7 Sonnet": 66.7,
|
| 101 |
"Claude 4.5 Sonnet": 58.6,
|
| 102 |
-
"
|
|
|
|
| 103 |
}
|
| 104 |
},
|
| 105 |
"Math": {
|
|
@@ -121,11 +122,11 @@
|
|
| 121 |
"GPT-4.1 (April)": 59.5,
|
| 122 |
"LLaMa 4 405B": 65.0,
|
| 123 |
"Lugha-Llama 8B": 1.8,
|
|
|
|
| 124 |
"Gemini-2.5 Flash": 70.6,
|
| 125 |
-
"Claude 4.0 Sonnet": 66.9,
|
| 126 |
-
"Claude 3.7 Sonnet": 35.2,
|
| 127 |
"Claude 4.5 Sonnet": 73.1,
|
| 128 |
-
"
|
|
|
|
| 129 |
}
|
| 130 |
},
|
| 131 |
"Topic": {
|
|
@@ -147,11 +148,11 @@
|
|
| 147 |
"GPT-4.1 (April)": 84.8,
|
| 148 |
"LLaMa 4 405B": 80.6,
|
| 149 |
"Lugha-Llama 8B": 34.1,
|
|
|
|
| 150 |
"Gemini-2.5 Flash": 87.2,
|
| 151 |
-
"Claude 4.0 Sonnet": 83.2,
|
| 152 |
-
"Claude 3.7 Sonnet": 84.9,
|
| 153 |
"Claude 4.5 Sonnet": 84.2,
|
| 154 |
-
"
|
|
|
|
| 155 |
}
|
| 156 |
},
|
| 157 |
"RC": {
|
|
@@ -173,11 +174,11 @@
|
|
| 173 |
"GPT-4.1 (April)": 64.8,
|
| 174 |
"LLaMa 4 405B": 24.6,
|
| 175 |
"Lugha-Llama 8B": 23.0,
|
|
|
|
| 176 |
"Gemini-2.5 Flash": 42.2,
|
| 177 |
-
"Claude 4.0 Sonnet": 76.2,
|
| 178 |
-
"Claude 3.7 Sonnet": 65.1,
|
| 179 |
"Claude 4.5 Sonnet": 74.8,
|
| 180 |
-
"
|
|
|
|
| 181 |
}
|
| 182 |
}
|
| 183 |
}
|
|
|
|
| 18 |
"GPT-4.1 (April)": 67.5,
|
| 19 |
"LLaMa 4 405B": 45.5,
|
| 20 |
"Lugha-Llama 8B": 36.7,
|
| 21 |
+
"Gemini-2.5 Pro": 72.7,
|
| 22 |
"Gemini-2.5 Flash": 69.9,
|
|
|
|
|
|
|
| 23 |
"Claude 4.5 Sonnet": 71.7,
|
| 24 |
+
"Claude 4 Sonnet": 68.1,
|
| 25 |
+
"Claude 3.7 Sonnet": 59.8
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
|
|
| 44 |
"GPT-4.1 (April)": 84.4,
|
| 45 |
"LLaMa 4 405B": 73.9,
|
| 46 |
"Lugha-Llama 8B": 4.1,
|
| 47 |
+
"Gemini-2.5 Pro": 88.3,
|
| 48 |
"Gemini-2.5 Flash": 87.9,
|
|
|
|
|
|
|
| 49 |
"Claude 4.5 Sonnet": 81.6,
|
| 50 |
+
"Claude 4 Sonnet": 80.4,
|
| 51 |
+
"Claude 3.7 Sonnet": 73.4
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
|
|
| 70 |
"GPT-4.1 (April)": 47.3,
|
| 71 |
"LLaMa 4 405B": 42.8,
|
| 72 |
"Lugha-Llama 8B": 22.1,
|
| 73 |
+
"Gemini-2.5 Pro": 47.4,
|
| 74 |
"Gemini-2.5 Flash": 46.5,
|
| 75 |
+
"Claude 4.5 Sonnet": 46.6,
|
| 76 |
+
"Claude 4 Sonnet": 46.0,
|
| 77 |
+
"Claude 3.7 Sonnet": 44.0
|
| 78 |
}
|
| 79 |
},
|
| 80 |
"MMLU": {
|
|
|
|
| 96 |
"GPT-4.1 (April)": 60.2,
|
| 97 |
"LLaMa 4 405B": 15.8,
|
| 98 |
"Lugha-Llama 8B": 25.2,
|
| 99 |
+
"Gemini-2.5 Pro": 78.2,
|
| 100 |
"Gemini-2.5 Flash": 67.7,
|
|
|
|
|
|
|
| 101 |
"Claude 4.5 Sonnet": 58.6,
|
| 102 |
+
"Claude 4 Sonnet": 75.5,
|
| 103 |
+
"Claude 3.7 Sonnet": 66.7
|
| 104 |
}
|
| 105 |
},
|
| 106 |
"Math": {
|
|
|
|
| 122 |
"GPT-4.1 (April)": 59.5,
|
| 123 |
"LLaMa 4 405B": 65.0,
|
| 124 |
"Lugha-Llama 8B": 1.8,
|
| 125 |
+
"Gemini-2.5 Pro": 74.4,
|
| 126 |
"Gemini-2.5 Flash": 70.6,
|
|
|
|
|
|
|
| 127 |
"Claude 4.5 Sonnet": 73.1,
|
| 128 |
+
"Claude 4 Sonnet": 66.9,
|
| 129 |
+
"Claude 3.7 Sonnet": 35.2
|
| 130 |
}
|
| 131 |
},
|
| 132 |
"Topic": {
|
|
|
|
| 148 |
"GPT-4.1 (April)": 84.8,
|
| 149 |
"LLaMa 4 405B": 80.6,
|
| 150 |
"Lugha-Llama 8B": 34.1,
|
| 151 |
+
"Gemini-2.5 Pro": 88.1,
|
| 152 |
"Gemini-2.5 Flash": 87.2,
|
|
|
|
|
|
|
| 153 |
"Claude 4.5 Sonnet": 84.2,
|
| 154 |
+
"Claude 4 Sonnet": 83.2,
|
| 155 |
+
"Claude 3.7 Sonnet": 84.9
|
| 156 |
}
|
| 157 |
},
|
| 158 |
"RC": {
|
|
|
|
| 174 |
"GPT-4.1 (April)": 64.8,
|
| 175 |
"LLaMa 4 405B": 24.6,
|
| 176 |
"Lugha-Llama 8B": 23.0,
|
| 177 |
+
"Gemini-2.5 Pro": 76.7,
|
| 178 |
"Gemini-2.5 Flash": 42.2,
|
|
|
|
|
|
|
| 179 |
"Claude 4.5 Sonnet": 74.8,
|
| 180 |
+
"Claude 4 Sonnet": 76.2,
|
| 181 |
+
"Claude 3.7 Sonnet": 65.1
|
| 182 |
}
|
| 183 |
}
|
| 184 |
}
|