Spaces:
Running
Running
| { | |
| "results": { | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.26181353767560667, | |
| "acc_stderr": 0.01572083867844526, | |
| "acc_norm": 0.24776500638569604, | |
| "acc_norm_stderr": 0.015438083080568965 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.2978723404255319, | |
| "acc_stderr": 0.027281608344469414, | |
| "acc_norm": 0.2695035460992908, | |
| "acc_norm_stderr": 0.02646903681859062 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.2346368715083799, | |
| "acc_stderr": 0.014173044098303654, | |
| "acc_norm": 0.2569832402234637, | |
| "acc_norm_stderr": 0.014614465821966361 | |
| }, | |
| "sciq": { | |
| "acc": 0.633, | |
| "acc_stderr": 0.015249378464171749, | |
| "acc_norm": 0.552, | |
| "acc_norm_stderr": 0.01573351656634783 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.27450980392156865, | |
| "acc_stderr": 0.02555316999182651, | |
| "acc_norm": 0.3366013071895425, | |
| "acc_norm_stderr": 0.027057974624494382 | |
| }, | |
| "piqa": { | |
| "acc": 0.5973884657236126, | |
| "acc_stderr": 0.011442395233488698, | |
| "acc_norm": 0.5854189336235038, | |
| "acc_norm_stderr": 0.011494326682255158 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.2549019607843137, | |
| "acc_stderr": 0.030587591351604243, | |
| "acc_norm": 0.27941176470588236, | |
| "acc_norm_stderr": 0.031493281045079556 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.15702479338842976, | |
| "acc_stderr": 0.0332124484254713, | |
| "acc_norm": 0.4132231404958678, | |
| "acc_norm_stderr": 0.04495087843548408 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.035914440841969694, | |
| "acc_norm": 0.2740740740740741, | |
| "acc_norm_stderr": 0.03853254836552003 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 4.660533489096573, | |
| "likelihood_difference_stderr": 0.22532366484380598, | |
| "pct_stereotype": 0.5077881619937694, | |
| "pct_stereotype_stderr": 0.027947458769356347 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.2757352941176471, | |
| "acc_stderr": 0.027146271936625162, | |
| "acc_norm": 0.3125, | |
| "acc_norm_stderr": 0.02815637344037142 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.27339449541284405, | |
| "acc_stderr": 0.0191092998460983, | |
| "acc_norm": 0.24403669724770644, | |
| "acc_norm_stderr": 0.018415286351416395 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.23684210526315788, | |
| "acc_stderr": 0.03459777606810535, | |
| "acc_norm": 0.3355263157894737, | |
| "acc_norm_stderr": 0.03842498559395268 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.22699386503067484, | |
| "acc_stderr": 0.03291099578615769, | |
| "acc_norm": 0.26380368098159507, | |
| "acc_norm_stderr": 0.03462419931615623 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 6.6946022727272725, | |
| "likelihood_difference_stderr": 0.7491237826255029, | |
| "pct_stereotype": 0.5, | |
| "pct_stereotype_stderr": 0.06201736729460421 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.2315270935960591, | |
| "acc_stderr": 0.029678333141444455, | |
| "acc_norm": 0.3054187192118227, | |
| "acc_norm_stderr": 0.03240661565868408 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.25925925925925924, | |
| "acc_stderr": 0.022569897074918424, | |
| "acc_norm": 0.25925925925925924, | |
| "acc_norm_stderr": 0.022569897074918424 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.3053435114503817, | |
| "acc_stderr": 0.040393149787245605, | |
| "acc_norm": 0.22137404580152673, | |
| "acc_norm_stderr": 0.03641297081313729 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.01751781884501444, | |
| "acc_norm": 0.2630718954248366, | |
| "acc_norm_stderr": 0.017812676542320653 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 4.175815217391304, | |
| "likelihood_difference_stderr": 0.5425080644657401, | |
| "pct_stereotype": 0.5130434782608696, | |
| "pct_stereotype_stderr": 0.04681335351503156 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.28, | |
| "acc_stderr": 0.045126085985421255, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.04560480215720684 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 5.3977272727272725, | |
| "likelihood_difference_stderr": 1.790491828842816, | |
| "pct_stereotype": 0.6363636363636364, | |
| "pct_stereotype_stderr": 0.15212000482437738 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.30701754385964913, | |
| "acc_stderr": 0.04339138322579861, | |
| "acc_norm": 0.2631578947368421, | |
| "acc_norm_stderr": 0.041424397194893624 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.23529411764705882, | |
| "acc_stderr": 0.027553614467863825, | |
| "acc_norm": 0.3403361344537815, | |
| "acc_norm_stderr": 0.030778057422931673 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.2774566473988439, | |
| "acc_stderr": 0.024105712607754307, | |
| "acc_norm": 0.2947976878612717, | |
| "acc_norm_stderr": 0.024547617794803835 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.33035714285714285, | |
| "acc_stderr": 0.04464285714285713, | |
| "acc_norm": 0.20535714285714285, | |
| "acc_norm_stderr": 0.03834241021419073 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.2621359223300971, | |
| "acc_stderr": 0.043546310772605935, | |
| "acc_norm": 0.27184466019417475, | |
| "acc_norm_stderr": 0.044052680241409216 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 5.223692602040816, | |
| "likelihood_difference_stderr": 0.3716517632652829, | |
| "pct_stereotype": 0.5510204081632653, | |
| "pct_stereotype_stderr": 0.03561884533975955 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.2564102564102564, | |
| "acc_stderr": 0.022139081103971534, | |
| "acc_norm": 0.28205128205128205, | |
| "acc_norm_stderr": 0.022815813098896597 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.2897959183673469, | |
| "acc_stderr": 0.029043088683304345, | |
| "acc_norm": 0.2530612244897959, | |
| "acc_norm_stderr": 0.027833023871399683 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.04351941398892446, | |
| "acc_norm": 0.31, | |
| "acc_norm_stderr": 0.04648231987117316 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.2361111111111111, | |
| "acc_stderr": 0.028963702570791033, | |
| "acc_norm": 0.27314814814814814, | |
| "acc_norm_stderr": 0.03038805130167812 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 3.675657796660704, | |
| "likelihood_difference_stderr": 0.10428478695252169, | |
| "pct_stereotype": 0.5438282647584973, | |
| "pct_stereotype_stderr": 0.012166287275376289 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.2185430463576159, | |
| "acc_stderr": 0.03374235550425694, | |
| "acc_norm": 0.25165562913907286, | |
| "acc_norm_stderr": 0.03543304234389985 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 3.5057713963963963, | |
| "likelihood_difference_stderr": 0.4253117969664197, | |
| "pct_stereotype": 0.6216216216216216, | |
| "pct_stereotype_stderr": 0.04624128233851482 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 4.478158602150538, | |
| "likelihood_difference_stderr": 0.5463367427565824, | |
| "pct_stereotype": 0.7849462365591398, | |
| "pct_stereotype_stderr": 0.04283507835554755 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 4.158223684210526, | |
| "likelihood_difference_stderr": 0.2827099752616182, | |
| "pct_stereotype": 0.5842105263157895, | |
| "pct_stereotype_stderr": 0.0358501132552001 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 2.90234375, | |
| "likelihood_difference_stderr": 0.26743360486517015, | |
| "pct_stereotype": 0.5375, | |
| "pct_stereotype_stderr": 0.02791577963000663 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.2413793103448276, | |
| "acc_stderr": 0.03565998174135303, | |
| "acc_norm": 0.27586206896551724, | |
| "acc_norm_stderr": 0.03724563619774632 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.31, | |
| "acc_stderr": 0.04648231987117316, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.04351941398892446 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.19, | |
| "acc_stderr": 0.03942772444036625, | |
| "acc_norm": 0.24, | |
| "acc_norm_stderr": 0.042923469599092816 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.2545454545454545, | |
| "acc_stderr": 0.04172343038705383, | |
| "acc_norm": 0.20909090909090908, | |
| "acc_norm_stderr": 0.03895091015724137 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 4.967708333333333, | |
| "likelihood_difference_stderr": 0.4550873657608913, | |
| "pct_stereotype": 0.43333333333333335, | |
| "pct_stereotype_stderr": 0.05252667118728807 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.25301204819277107, | |
| "acc_stderr": 0.033844291552331346, | |
| "acc_norm": 0.21686746987951808, | |
| "acc_norm_stderr": 0.03208284450356365 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 5.224392361111111, | |
| "likelihood_difference_stderr": 0.5949955425776441, | |
| "pct_stereotype": 0.4861111111111111, | |
| "pct_stereotype_stderr": 0.059316185327165566 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.26905829596412556, | |
| "acc_stderr": 0.02976377940687497, | |
| "acc_norm": 0.21524663677130046, | |
| "acc_norm_stderr": 0.027584066602208274 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.25906735751295334, | |
| "acc_stderr": 0.03161877917935409, | |
| "acc_norm": 0.3005181347150259, | |
| "acc_norm_stderr": 0.033088185944157515 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.24437299035369775, | |
| "acc_stderr": 0.024406162094668893, | |
| "acc_norm": 0.26688102893890675, | |
| "acc_norm_stderr": 0.025122637608816646 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 124.26962204175287, | |
| "ppl_stderr": 5.363117769801199, | |
| "acc": 0.22627595575392975, | |
| "acc_stderr": 0.005829406265404375 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 3.8758680555555554, | |
| "likelihood_difference_stderr": 0.41377726625457284, | |
| "pct_stereotype": 0.625, | |
| "pct_stereotype_stderr": 0.05745481997211521 | |
| }, | |
| "winogrande": { | |
| "acc": 0.5193370165745856, | |
| "acc_stderr": 0.014041972733712972 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 3.6435908564814814, | |
| "likelihood_difference_stderr": 0.26705840381438256, | |
| "pct_stereotype": 0.4305555555555556, | |
| "pct_stereotype_stderr": 0.03376922151252336 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.17647058823529413, | |
| "acc_stderr": 0.03793281185307809, | |
| "acc_norm": 0.23529411764705882, | |
| "acc_norm_stderr": 0.04220773659171453 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 3.7424950787401574, | |
| "likelihood_difference_stderr": 0.18169346622004526, | |
| "pct_stereotype": 0.5059055118110236, | |
| "pct_stereotype_stderr": 0.02220423067397246 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.2936170212765957, | |
| "acc_stderr": 0.02977164271249123, | |
| "acc_norm": 0.1829787234042553, | |
| "acc_norm_stderr": 0.025276041000449966 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.23773584905660378, | |
| "acc_stderr": 0.026199808807561915, | |
| "acc_norm": 0.3018867924528302, | |
| "acc_norm_stderr": 0.028254200344438662 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.18, | |
| "acc_stderr": 0.03861229196653697, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.04020151261036845 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816505, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.046056618647183814 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.041633319989322716, | |
| "acc_norm": 0.32, | |
| "acc_norm_stderr": 0.04688261722621503 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.23976608187134502, | |
| "acc_stderr": 0.03274485211946956, | |
| "acc_norm": 0.3157894736842105, | |
| "acc_norm_stderr": 0.03565079670708311 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.24378109452736318, | |
| "acc_stderr": 0.030360490154014638, | |
| "acc_norm": 0.2835820895522388, | |
| "acc_norm_stderr": 0.03187187537919798 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.32, | |
| "acc_stderr": 0.04688261722621504, | |
| "acc_norm": 0.33, | |
| "acc_norm_stderr": 0.047258156262526045 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.2109704641350211, | |
| "acc_stderr": 0.02655837250266192, | |
| "acc_norm": 0.2742616033755274, | |
| "acc_norm_stderr": 0.029041333510598046 | |
| }, | |
| "logiqa": { | |
| "acc": 0.23195084485407066, | |
| "acc_stderr": 0.0165552524979259, | |
| "acc_norm": 0.27035330261136714, | |
| "acc_norm_stderr": 0.01742069478339314 | |
| }, | |
| "wsc": { | |
| "acc": 0.36538461538461536, | |
| "acc_stderr": 0.0474473339327792 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036843, | |
| "acc_norm": 0.26, | |
| "acc_norm_stderr": 0.044084400227680814 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.2870967741935484, | |
| "acc_stderr": 0.025736542745594528, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.02606936229533513 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.27350427350427353, | |
| "acc_stderr": 0.029202540153431177, | |
| "acc_norm": 0.2606837606837607, | |
| "acc_norm_stderr": 0.028760348956523414 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.24771838331160365, | |
| "acc_stderr": 0.011025499291443742, | |
| "acc_norm": 0.27444589308996087, | |
| "acc_norm_stderr": 0.011397043163078154 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 7.527667984189724, | |
| "likelihood_difference_stderr": 0.4209795564667756, | |
| "pct_stereotype": 0.308300395256917, | |
| "pct_stereotype_stderr": 0.02909012143059231 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.26851851851851855, | |
| "acc_stderr": 0.024659685185967284, | |
| "acc_norm": 0.21296296296296297, | |
| "acc_norm_stderr": 0.0227797190887334 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 7.548076923076923, | |
| "likelihood_difference_stderr": 0.5113727094452629, | |
| "pct_stereotype": 0.8131868131868132, | |
| "pct_stereotype_stderr": 0.04108446855035883 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.1814814814814815, | |
| "acc_stderr": 0.023499264669407292, | |
| "acc_norm": 0.22962962962962963, | |
| "acc_norm_stderr": 0.025644108639267613 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.2037037037037037, | |
| "acc_stderr": 0.038935425188248475, | |
| "acc_norm": 0.3611111111111111, | |
| "acc_norm_stderr": 0.04643454608906275 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 4.847758152173913, | |
| "likelihood_difference_stderr": 0.2507391728199927, | |
| "pct_stereotype": 0.3239130434782609, | |
| "pct_stereotype_stderr": 0.021842842500532617 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.24, | |
| "acc_stderr": 0.04292346959909283, | |
| "acc_norm": 0.24, | |
| "acc_norm_stderr": 0.04292346959909283 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.37415824915824913, | |
| "acc_stderr": 0.009929516948977625, | |
| "acc_norm": 0.3367003367003367, | |
| "acc_norm_stderr": 0.009697166595752477 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.18600682593856654, | |
| "acc_stderr": 0.011370940183266749, | |
| "acc_norm": 0.22440273037542663, | |
| "acc_norm_stderr": 0.012191404938603843 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 2.8133585164835164, | |
| "likelihood_difference_stderr": 0.27309263450343635, | |
| "pct_stereotype": 0.4725274725274725, | |
| "pct_stereotype_stderr": 0.05262501097748859 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 5.492307692307692, | |
| "likelihood_difference_stderr": 0.571991498636384, | |
| "pct_stereotype": 0.6461538461538462, | |
| "pct_stereotype_stderr": 0.05977027026123099 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 5.452854800238521, | |
| "likelihood_difference_stderr": 0.13262546821335017, | |
| "pct_stereotype": 0.4442456768038163, | |
| "pct_stereotype_stderr": 0.012137130534698507 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.2777777777777778, | |
| "acc_stderr": 0.040061680838488774, | |
| "acc_norm": 0.29365079365079366, | |
| "acc_norm_stderr": 0.04073524322147125 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.18181818181818182, | |
| "acc_stderr": 0.030117688929503585, | |
| "acc_norm": 0.2606060606060606, | |
| "acc_norm_stderr": 0.03427743175816524 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.2878787878787879, | |
| "acc_stderr": 0.03225883512300992, | |
| "acc_norm": 0.3181818181818182, | |
| "acc_norm_stderr": 0.03318477333845331 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.24277456647398843, | |
| "acc_stderr": 0.0326926380614177, | |
| "acc_norm": 0.3063583815028902, | |
| "acc_norm_stderr": 0.03514942551267437 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 4.454326923076923, | |
| "likelihood_difference_stderr": 1.3817380041698064, | |
| "pct_stereotype": 0.5384615384615384, | |
| "pct_stereotype_stderr": 0.14390989949130545 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.2569444444444444, | |
| "acc_stderr": 0.03653946969442099, | |
| "acc_norm": 0.2777777777777778, | |
| "acc_norm_stderr": 0.037455547914624576 | |
| } | |
| }, | |
| "versions": { | |
| "hendrycksTest-miscellaneous": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "sciq": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "piqa": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "hendrycksTest-international_law": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "crows_pairs_french_gender": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "crows_pairs_english_autre": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "hendrycksTest-management": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "crows_pairs_english": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "crows_pairs_english_religion": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "crows_pairs_english_gender": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "crows_pairs_french_age": 0, | |
| "hendrycksTest-virology": 0, | |
| "crows_pairs_french_physical_appearance": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "lambada_openai": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "winogrande": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "logiqa": 0, | |
| "wsc": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "arc_easy": 0, | |
| "arc_challenge": 0, | |
| "crows_pairs_english_age": 0, | |
| "crows_pairs_english_disability": 0, | |
| "crows_pairs_french": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-college_biology": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "pretrained=EleutherAI/pythia-v1.1-70m,revision=step83000", | |
| "num_fewshot": 0, | |
| "batch_size": 16, | |
| "device": "cuda:0", | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |