Spaces:
Running
Running
| { | |
| "results": { | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 5.687986760124611, | |
| "likelihood_difference_stderr": 0.2761406584883121, | |
| "pct_stereotype": 0.470404984423676, | |
| "pct_stereotype_stderr": 0.02790184442005117 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.23076923076923078, | |
| "acc_stderr": 0.027601921381417604, | |
| "acc_norm": 0.23076923076923078, | |
| "acc_norm_stderr": 0.027601921381417604 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.24587155963302754, | |
| "acc_stderr": 0.01846194096870845, | |
| "acc_norm": 0.26972477064220185, | |
| "acc_norm_stderr": 0.019028486711115445 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816507, | |
| "acc_norm": 0.23, | |
| "acc_norm_stderr": 0.042295258468165065 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.21, | |
| "acc_stderr": 0.040936018074033256, | |
| "acc_norm": 0.14, | |
| "acc_norm_stderr": 0.03487350880197772 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.1921182266009852, | |
| "acc_stderr": 0.02771931570961477, | |
| "acc_norm": 0.22167487684729065, | |
| "acc_norm_stderr": 0.029225575892489614 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.2543859649122807, | |
| "acc_stderr": 0.040969851398436695, | |
| "acc_norm": 0.2719298245614035, | |
| "acc_norm_stderr": 0.04185774424022056 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 6.718269230769231, | |
| "likelihood_difference_stderr": 0.8596632745046646, | |
| "pct_stereotype": 0.4461538461538462, | |
| "pct_stereotype_stderr": 0.06213651700539812 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.19, | |
| "acc_stderr": 0.03942772444036623, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.04560480215720684 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 6.856966403162056, | |
| "likelihood_difference_stderr": 0.32916109908316876, | |
| "pct_stereotype": 0.5454545454545454, | |
| "pct_stereotype_stderr": 0.0313666163337434 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.2818181818181818, | |
| "acc_stderr": 0.04309118709946458, | |
| "acc_norm": 0.18181818181818182, | |
| "acc_norm_stderr": 0.036942843353377997 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 3.380837912087912, | |
| "likelihood_difference_stderr": 0.49983969692767516, | |
| "pct_stereotype": 0.5164835164835165, | |
| "pct_stereotype_stderr": 0.05267597952306975 | |
| }, | |
| "logiqa": { | |
| "acc": 0.21044546850998463, | |
| "acc_stderr": 0.015988369488888755, | |
| "acc_norm": 0.23348694316436253, | |
| "acc_norm_stderr": 0.016593362460570887 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.3273542600896861, | |
| "acc_stderr": 0.03149384670994131, | |
| "acc_norm": 0.29596412556053814, | |
| "acc_norm_stderr": 0.03063659134869981 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.2698412698412698, | |
| "acc_stderr": 0.03970158273235172, | |
| "acc_norm": 0.30158730158730157, | |
| "acc_norm_stderr": 0.04104947269903394 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.18490566037735848, | |
| "acc_stderr": 0.023893351834464324, | |
| "acc_norm": 0.3169811320754717, | |
| "acc_norm_stderr": 0.02863723563980091 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 5.5, | |
| "likelihood_difference_stderr": 0.9504975080559196, | |
| "pct_stereotype": 0.5384615384615384, | |
| "pct_stereotype_stderr": 0.14390989949130545 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.2254335260115607, | |
| "acc_stderr": 0.02249723019096755, | |
| "acc_norm": 0.22832369942196531, | |
| "acc_norm_stderr": 0.022598703804321624 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 5.677083333333333, | |
| "likelihood_difference_stderr": 0.6194240763408452, | |
| "pct_stereotype": 0.6451612903225806, | |
| "pct_stereotype_stderr": 0.049883363937668256 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.25326797385620914, | |
| "acc_stderr": 0.01759348689536683, | |
| "acc_norm": 0.272875816993464, | |
| "acc_norm_stderr": 0.018020474148393577 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.18907563025210083, | |
| "acc_stderr": 0.025435119438105357, | |
| "acc_norm": 0.2773109243697479, | |
| "acc_norm_stderr": 0.02907937453948001 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.2175925925925926, | |
| "acc_stderr": 0.028139689444859645, | |
| "acc_norm": 0.23148148148148148, | |
| "acc_norm_stderr": 0.028765111718046944 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 4.05078125, | |
| "likelihood_difference_stderr": 0.38152405690444796, | |
| "pct_stereotype": 0.5, | |
| "pct_stereotype_stderr": 0.027994625547792713 | |
| }, | |
| "wsc": { | |
| "acc": 0.6346153846153846, | |
| "acc_stderr": 0.0474473339327792 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.18627450980392157, | |
| "acc_stderr": 0.027325470966716336, | |
| "acc_norm": 0.21568627450980393, | |
| "acc_norm_stderr": 0.028867431449849313 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 5.009853603603603, | |
| "likelihood_difference_stderr": 0.5228133914951523, | |
| "pct_stereotype": 0.5855855855855856, | |
| "pct_stereotype_stderr": 0.04696953631102271 | |
| }, | |
| "sciq": { | |
| "acc": 0.228, | |
| "acc_stderr": 0.013273740700804483, | |
| "acc_norm": 0.236, | |
| "acc_norm_stderr": 0.013434451402438685 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 4.72265625, | |
| "likelihood_difference_stderr": 0.5793499299137083, | |
| "pct_stereotype": 0.5555555555555556, | |
| "pct_stereotype_stderr": 0.05897165471491952 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.2767857142857143, | |
| "acc_stderr": 0.042466243366976256, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.04109974682633932 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.28703703703703703, | |
| "acc_stderr": 0.02517104191530968, | |
| "acc_norm": 0.24382716049382716, | |
| "acc_norm_stderr": 0.023891879541959593 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.23383084577114427, | |
| "acc_stderr": 0.029929415408348384, | |
| "acc_norm": 0.24875621890547264, | |
| "acc_norm_stderr": 0.03056767593891672 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.27, | |
| "acc_stderr": 0.044619604333847394, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.046056618647183814 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 6.74689736135957, | |
| "likelihood_difference_stderr": 0.15103608824599826, | |
| "pct_stereotype": 0.5533691115086464, | |
| "pct_stereotype_stderr": 0.012143526564900555 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.29, | |
| "acc_stderr": 0.045604802157206845, | |
| "acc_norm": 0.28, | |
| "acc_norm_stderr": 0.045126085985421276 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.12222222222222222, | |
| "acc_stderr": 0.019970605780284603, | |
| "acc_norm": 0.1814814814814815, | |
| "acc_norm_stderr": 0.023499264669407282 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.034765901043041336, | |
| "acc_norm": 0.20833333333333334, | |
| "acc_norm_stderr": 0.03396116205845335 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.251063829787234, | |
| "acc_stderr": 0.02834696377716246, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.0261488180184245 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.23798882681564246, | |
| "acc_stderr": 0.014242630070574915, | |
| "acc_norm": 0.27262569832402234, | |
| "acc_norm_stderr": 0.014893391735249588 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.1574074074074074, | |
| "acc_stderr": 0.03520703990517965, | |
| "acc_norm": 0.21296296296296297, | |
| "acc_norm_stderr": 0.039578354719809805 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 4.661393112701252, | |
| "likelihood_difference_stderr": 0.13998586074905606, | |
| "pct_stereotype": 0.456768038163387, | |
| "pct_stereotype_stderr": 0.012167560197793078 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 13.163461538461538, | |
| "likelihood_difference_stderr": 0.8325716351947234, | |
| "pct_stereotype": 0.7912087912087912, | |
| "pct_stereotype_stderr": 0.042843052065094304 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.23300970873786409, | |
| "acc_stderr": 0.04185832598928315, | |
| "acc_norm": 0.2815533980582524, | |
| "acc_norm_stderr": 0.04453254836326467 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 4.904440789473684, | |
| "likelihood_difference_stderr": 0.4062917141669697, | |
| "pct_stereotype": 0.48947368421052634, | |
| "pct_stereotype_stderr": 0.036361587723547695 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.22085889570552147, | |
| "acc_stderr": 0.032591773927421776, | |
| "acc_norm": 0.3128834355828221, | |
| "acc_norm_stderr": 0.036429145782924055 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.20394736842105263, | |
| "acc_stderr": 0.032790004063100495, | |
| "acc_norm": 0.27631578947368424, | |
| "acc_norm_stderr": 0.03639057569952925 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 6.349431818181818, | |
| "likelihood_difference_stderr": 2.804745680840638, | |
| "pct_stereotype": 0.45454545454545453, | |
| "pct_stereotype_stderr": 0.15745916432444335 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.17721518987341772, | |
| "acc_stderr": 0.02485636418450322, | |
| "acc_norm": 0.25738396624472576, | |
| "acc_norm_stderr": 0.028458820991460295 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.026303648393696036, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.026303648393696036 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.04351941398892446, | |
| "acc_norm": 0.21, | |
| "acc_norm_stderr": 0.040936018074033256 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 705314.6370389248, | |
| "ppl_stderr": 50610.68705557734, | |
| "acc": 0.0, | |
| "acc_stderr": 0.0 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.23699421965317918, | |
| "acc_stderr": 0.03242414757483098, | |
| "acc_norm": 0.2658959537572254, | |
| "acc_norm_stderr": 0.03368762932259432 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.27441077441077444, | |
| "acc_stderr": 0.00915617712224453, | |
| "acc_norm": 0.2849326599326599, | |
| "acc_norm_stderr": 0.009262170695590658 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.3306122448979592, | |
| "acc_stderr": 0.030116426296540613, | |
| "acc_norm": 0.20408163265306123, | |
| "acc_norm_stderr": 0.025801283475090506 | |
| }, | |
| "winogrande": { | |
| "acc": 0.4925019731649566, | |
| "acc_stderr": 0.014050905521228577 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 5.4428530092592595, | |
| "likelihood_difference_stderr": 0.3840752204417463, | |
| "pct_stereotype": 0.3333333333333333, | |
| "pct_stereotype_stderr": 0.03214952147802749 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.19965870307167236, | |
| "acc_stderr": 0.011681625756888669, | |
| "acc_norm": 0.24146757679180889, | |
| "acc_norm_stderr": 0.01250656483973943 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.04163331998932268, | |
| "acc_norm": 0.27, | |
| "acc_norm_stderr": 0.044619604333847394 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.1695906432748538, | |
| "acc_stderr": 0.028782108105401712, | |
| "acc_norm": 0.25146198830409355, | |
| "acc_norm_stderr": 0.033275044238468436 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 4.167361111111111, | |
| "likelihood_difference_stderr": 0.49130810000225555, | |
| "pct_stereotype": 0.4111111111111111, | |
| "pct_stereotype_stderr": 0.052155640611075534 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.2275132275132275, | |
| "acc_stderr": 0.021591269407823778, | |
| "acc_norm": 0.21164021164021163, | |
| "acc_norm_stderr": 0.02103733150526289 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.10743801652892562, | |
| "acc_stderr": 0.02826881219254063, | |
| "acc_norm": 0.2396694214876033, | |
| "acc_norm_stderr": 0.03896878985070417 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 10.162878787878787, | |
| "likelihood_difference_stderr": 1.04556369991972, | |
| "pct_stereotype": 0.3333333333333333, | |
| "pct_stereotype_stderr": 0.0584705346204686 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.23627075351213284, | |
| "acc_stderr": 0.015190473717037498, | |
| "acc_norm": 0.25287356321839083, | |
| "acc_norm_stderr": 0.015543377313719681 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.16363636363636364, | |
| "acc_stderr": 0.028887872395487953, | |
| "acc_norm": 0.24242424242424243, | |
| "acc_norm_stderr": 0.03346409881055953 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 7.765760869565217, | |
| "likelihood_difference_stderr": 0.49195584086877725, | |
| "pct_stereotype": 0.6869565217391305, | |
| "pct_stereotype_stderr": 0.043432470166108225 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.25886524822695034, | |
| "acc_stderr": 0.026129572527180848, | |
| "acc_norm": 0.2730496453900709, | |
| "acc_norm_stderr": 0.02657786094330786 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.18686868686868688, | |
| "acc_stderr": 0.027772533334218977, | |
| "acc_norm": 0.30303030303030304, | |
| "acc_norm_stderr": 0.032742879140268674 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.2074074074074074, | |
| "acc_stderr": 0.03502553170678319, | |
| "acc_norm": 0.25925925925925924, | |
| "acc_norm_stderr": 0.03785714465066653 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.2379421221864952, | |
| "acc_stderr": 0.02418515064781871, | |
| "acc_norm": 0.2990353697749196, | |
| "acc_norm_stderr": 0.02600330111788513 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 4.281742125984252, | |
| "likelihood_difference_stderr": 0.21780058915583433, | |
| "pct_stereotype": 0.3838582677165354, | |
| "pct_stereotype_stderr": 0.021598410071068296 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.19689119170984457, | |
| "acc_stderr": 0.028697873971860674, | |
| "acc_norm": 0.2538860103626943, | |
| "acc_norm_stderr": 0.03141024780565318 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.2052980132450331, | |
| "acc_stderr": 0.03297986648473836, | |
| "acc_norm": 0.24503311258278146, | |
| "acc_norm_stderr": 0.035118075718047245 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 7.983976403061225, | |
| "likelihood_difference_stderr": 0.545579868210259, | |
| "pct_stereotype": 0.34183673469387754, | |
| "pct_stereotype_stderr": 0.033967132039868675 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.19743589743589743, | |
| "acc_stderr": 0.02018264696867484, | |
| "acc_norm": 0.22564102564102564, | |
| "acc_norm_stderr": 0.02119363252514852 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.29770992366412213, | |
| "acc_stderr": 0.04010358942462203, | |
| "acc_norm": 0.2824427480916031, | |
| "acc_norm_stderr": 0.03948406125768361 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.25517241379310346, | |
| "acc_stderr": 0.03632984052707842, | |
| "acc_norm": 0.2689655172413793, | |
| "acc_norm_stderr": 0.036951833116502325 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816505, | |
| "acc_norm": 0.24, | |
| "acc_norm_stderr": 0.042923469599092816 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 5.3552989130434785, | |
| "likelihood_difference_stderr": 0.2271004698936648, | |
| "pct_stereotype": 0.6869565217391305, | |
| "pct_stereotype_stderr": 0.021645150653106047 | |
| }, | |
| "piqa": { | |
| "acc": 0.5179542981501633, | |
| "acc_stderr": 0.011658300623287153, | |
| "acc_norm": 0.515778019586507, | |
| "acc_norm_stderr": 0.011660014400426182 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.22289156626506024, | |
| "acc_stderr": 0.03240004825594688, | |
| "acc_norm": 0.25301204819277107, | |
| "acc_norm_stderr": 0.03384429155233137 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.16, | |
| "acc_stderr": 0.03684529491774708, | |
| "acc_norm": 0.17, | |
| "acc_norm_stderr": 0.0377525168068637 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.2129032258064516, | |
| "acc_stderr": 0.02328766512726853, | |
| "acc_norm": 0.23870967741935484, | |
| "acc_norm_stderr": 0.024251071262208837 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.242503259452412, | |
| "acc_stderr": 0.010946570966348783, | |
| "acc_norm": 0.2711864406779661, | |
| "acc_norm_stderr": 0.011354581451622986 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.20588235294117646, | |
| "acc_stderr": 0.04023382273617747, | |
| "acc_norm": 0.19607843137254902, | |
| "acc_norm_stderr": 0.03950581861179962 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.19607843137254902, | |
| "acc_stderr": 0.022733789405447593, | |
| "acc_norm": 0.28431372549019607, | |
| "acc_norm_stderr": 0.025829163272757482 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.32, | |
| "acc_stderr": 0.04688261722621505, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.04560480215720684 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 7.185329861111111, | |
| "likelihood_difference_stderr": 0.9560662240150144, | |
| "pct_stereotype": 0.5416666666666666, | |
| "pct_stereotype_stderr": 0.05913268547421809 | |
| } | |
| }, | |
| "versions": { | |
| "crows_pairs_french_gender": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "crows_pairs_english_disability": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "crows_pairs_english_age": 0, | |
| "logiqa": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "crows_pairs_english_gender": 0, | |
| "wsc": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "crows_pairs_english_religion": 0, | |
| "sciq": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "crows_pairs_french": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "crows_pairs_english": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-management": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "crows_pairs_english_autre": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "lambada_openai": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "arc_easy": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "winogrande": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "arc_challenge": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "crows_pairs_french_age": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-international_law": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "piqa": 0, | |
| "hendrycksTest-virology": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "crows_pairs_french_physical_appearance": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "use_accelerate=True,pretrained=EleutherAI/pythia-v1.1-70m,revision=step256", | |
| "num_fewshot": 0, | |
| "batch_size": 32, | |
| "device": null, | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |