Spaces:
Runtime error
Runtime error
| { | |
| "task": "Knowledge", | |
| "subtasks": { | |
| "MMLU": { | |
| "datasets": { | |
| "afrimmlu": { | |
| "languages": [ | |
| "amh", | |
| "ewe", | |
| "hau", | |
| "ibo", | |
| "kin", | |
| "lin", | |
| "lug", | |
| "orm", | |
| "sna", | |
| "sot", | |
| "swa", | |
| "twi", | |
| "wol", | |
| "xho", | |
| "yor", | |
| "zul" | |
| ], | |
| "scores": { | |
| "AfroLLaMa 8B": [ | |
| 23.9, | |
| 27.2, | |
| 28.4, | |
| 26.7, | |
| 23.0, | |
| 26.4, | |
| 24.5, | |
| 22.6, | |
| 25.1, | |
| 26.0, | |
| 29.4, | |
| 24.2, | |
| 24.1, | |
| 26.6, | |
| 27.5, | |
| 26.4 | |
| ], | |
| "Aya-101 13B": [ | |
| 31.6, | |
| 25.4, | |
| 33.4, | |
| 36.8, | |
| 30.8, | |
| 27.8, | |
| 28.0, | |
| 26.2, | |
| 28.2, | |
| 31.8, | |
| 32.2, | |
| 26.8, | |
| 25.2, | |
| 32.0, | |
| 28.4, | |
| 29.8 | |
| ], | |
| "Gemma1.1 7b": [ | |
| 24.8, | |
| 26.6, | |
| 28.8, | |
| 27.4, | |
| 25.4, | |
| 27.6, | |
| 29.2, | |
| 27.8, | |
| 27.6, | |
| 28.8, | |
| 28.4, | |
| 29.6, | |
| 25.4, | |
| 25.0, | |
| 27.4, | |
| 28.0 | |
| ], | |
| "LLaMa2 7b": [ | |
| 23.2, | |
| 24.6, | |
| 25.2, | |
| 29.4, | |
| 26.6, | |
| 25.8, | |
| 27.4, | |
| 24.4, | |
| 25.8, | |
| 26.8, | |
| 26.0, | |
| 30.0, | |
| 27.6, | |
| 25.4, | |
| 25.4, | |
| 23.4 | |
| ], | |
| "LLaMa3 8B": [ | |
| 29.4, | |
| 26.4, | |
| 27.8, | |
| 30.6, | |
| 30.4, | |
| 28.4, | |
| 25.0, | |
| 31.6, | |
| 26.6, | |
| 28.6, | |
| 32.0, | |
| 26.6, | |
| 26.4, | |
| 23.4, | |
| 29.0, | |
| 27.8 | |
| ], | |
| "LLaMAX3 8B": [ | |
| 30.2, | |
| 28.0, | |
| 33.4, | |
| 32.0, | |
| 27.8, | |
| 28.4, | |
| 25.2, | |
| 30.0, | |
| 31.6, | |
| 29.4, | |
| 35.6, | |
| 25.8, | |
| 27.0, | |
| 28.0, | |
| 29.8, | |
| 26.6 | |
| ], | |
| "Gemma2 9b": [ | |
| 41.3, | |
| 29.3, | |
| 37.9, | |
| 39.2, | |
| 31.8, | |
| 37.0, | |
| 30.5, | |
| 32.8, | |
| 39.7, | |
| 34.7, | |
| 48.9, | |
| 32.1, | |
| 28.7, | |
| 33.2, | |
| 33.2, | |
| 36.1 | |
| ], | |
| "Gemma2 27b": [ | |
| 40.6, | |
| 32.4, | |
| 43.2, | |
| 44.2, | |
| 40.2, | |
| 38.2, | |
| 32.6, | |
| 33.6, | |
| 44.6, | |
| 41.8, | |
| 56.0, | |
| 35.6, | |
| 30.4, | |
| 42.0, | |
| 41.0, | |
| 42.2 | |
| ], | |
| "LLaMa3.1 8B": [ | |
| 35.0, | |
| 28.8, | |
| 34.6, | |
| 32.6, | |
| 31.6, | |
| 30.6, | |
| 29.4, | |
| 30.4, | |
| 29.6, | |
| 31.8, | |
| 40.0, | |
| 27.2, | |
| 26.6, | |
| 26.6, | |
| 32.4, | |
| 31.8 | |
| ], | |
| "LLaMa3.1 70B": [ | |
| 41.6, | |
| 32.2, | |
| 47.6, | |
| 47.2, | |
| 38.6, | |
| 40.0, | |
| 34.4, | |
| 35.6, | |
| 41.6, | |
| 39.0, | |
| 55.8, | |
| 28.4, | |
| 31.6, | |
| 34.2, | |
| 41.4, | |
| 40.4 | |
| ], | |
| "GPT-4o (Aug)": [ | |
| 59.8, | |
| 33.6, | |
| 67.2, | |
| 67.2, | |
| 64.2, | |
| 61.0, | |
| 52.8, | |
| 61.0, | |
| 67.6, | |
| 67.4, | |
| 77.4, | |
| 43.2, | |
| 37.8, | |
| 70.2, | |
| 61.2, | |
| 68.2 | |
| ], | |
| "Gemini 1.5 pro": [ | |
| 68.0, | |
| 39.0, | |
| 71.2, | |
| 70.4, | |
| 65.0, | |
| 55.2, | |
| 53.0, | |
| 55.8, | |
| 66.8, | |
| 67.6, | |
| 78.4, | |
| 48.2, | |
| 32.0, | |
| 69.2, | |
| 57.4, | |
| 66.0 | |
| ] | |
| } | |
| }, | |
| "openaimmlu": { | |
| "languages": [ | |
| "ara", | |
| "swa", | |
| "yor" | |
| ], | |
| "scores": { | |
| "AfroLLaMa 8B": [ | |
| 24.3, | |
| 28.0, | |
| 26.7 | |
| ], | |
| "LLaMAX3 8B": [ | |
| 23.3, | |
| 23.4, | |
| 23.4 | |
| ], | |
| "LLaMa2 7b": [ | |
| 25.3, | |
| 24.3, | |
| 23.7 | |
| ], | |
| "LLaMa3.1 8B": [ | |
| 41.2, | |
| 35.4, | |
| 29.7 | |
| ], | |
| "LLaMa3.1 70B": [ | |
| 59.9, | |
| 45.9, | |
| 31.7 | |
| ], | |
| "LLaMa3 8B": [ | |
| 27.0, | |
| 25.7, | |
| 24.2 | |
| ], | |
| "Aya-101 13B": [ | |
| 39.1, | |
| 38.6, | |
| 33.3 | |
| ], | |
| "Gemma1.1 7b": [ | |
| 41.8, | |
| 36.4, | |
| 28.9 | |
| ], | |
| "Gemma2 27b": [ | |
| 58.5, | |
| 54.2, | |
| 31.7 | |
| ], | |
| "Gemma2 9b": [ | |
| 56.2, | |
| 50.9, | |
| 34.1 | |
| ], | |
| "Gemini 1.5 pro": [ | |
| 46.1, | |
| 52.8, | |
| 31.6 | |
| ], | |
| "GPT-4o (Aug)": [ | |
| 77.3, | |
| 69.4, | |
| 41.8 | |
| ] | |
| } | |
| } | |
| } | |
| }, | |
| "Arc-E": { | |
| "datasets": { | |
| "uhura": { | |
| "languages": [ | |
| "en", | |
| "am", | |
| "ha", | |
| "sw", | |
| "yo", | |
| "zu" | |
| ], | |
| "scores": { | |
| "AfroLLaMa 8B": [ | |
| 67.2, | |
| 22.0, | |
| 34.1, | |
| 43.4, | |
| 34.2, | |
| 52.3 | |
| ], | |
| "LLaMAX3 8B": [ | |
| 84.9, | |
| 32.0, | |
| 39.2, | |
| 49.4, | |
| 29.2, | |
| 49.6 | |
| ], | |
| "LLaMa2 7b": [ | |
| 76.4, | |
| 23.1, | |
| 26.6, | |
| 20.3, | |
| 21.2, | |
| 25.5 | |
| ], | |
| "LLaMa3.1 8B": [ | |
| 95.2, | |
| 30.3, | |
| 32.0, | |
| 46.8, | |
| 28.4, | |
| 26.4 | |
| ], | |
| "LLaMa3 8B": [ | |
| 92.7, | |
| 25.9, | |
| 34.0, | |
| 41.1, | |
| 29.2, | |
| 29.8 | |
| ], | |
| "Gemma1.1 7b": [ | |
| 92.2, | |
| 28.7, | |
| 23.8, | |
| 46.3, | |
| 26.3, | |
| 36.0 | |
| ], | |
| "Gemma2 27b": [ | |
| 98.6, | |
| 53.8, | |
| 55.3, | |
| 87.4, | |
| 33.6, | |
| 51.3 | |
| ], | |
| "Gemma2 9b": [ | |
| 97.1, | |
| 52.6, | |
| 49.5, | |
| 81.8, | |
| 30.7, | |
| 52.1 | |
| ], | |
| "Aya-101 13B": [ | |
| 84.8, | |
| 65.0, | |
| 58.2, | |
| 65.2, | |
| 41.7, | |
| 69.7 | |
| ], | |
| "LLaMa3.1 70B": [ | |
| 97.3, | |
| 54.2, | |
| 56.6, | |
| 81.7, | |
| 41.3, | |
| 53.7 | |
| ], | |
| "GPT-4o (Aug)": [ | |
| 91.6, | |
| 99.0, | |
| 82.5, | |
| 80.0, | |
| 93.5, | |
| 73.5 | |
| ], | |
| "Gemini 1.5 pro": [ | |
| 78.4, | |
| 99.2, | |
| 80.3, | |
| 79.2, | |
| 93.7, | |
| 71.5 | |
| ] | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |