Spaces:
Running
Running
| { | |
| "results": { | |
| "lambada_openai": { | |
| "ppl": 3288862.4386760374, | |
| "ppl_stderr": 311605.46093383565, | |
| "acc": 0.0, | |
| "acc_stderr": 0.0 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.16447368421052633, | |
| "acc_stderr": 0.030167533468632723, | |
| "acc_norm": 0.2236842105263158, | |
| "acc_norm_stderr": 0.033911609343436046 | |
| }, | |
| "winogrande": { | |
| "acc": 0.4940805051302289, | |
| "acc_stderr": 0.01405150083848581 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.18627450980392157, | |
| "acc_stderr": 0.027325470966716323, | |
| "acc_norm": 0.2647058823529412, | |
| "acc_norm_stderr": 0.030964517926923393 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 5.902777777777778, | |
| "likelihood_difference_stderr": 0.745349522367746, | |
| "pct_stereotype": 0.6777777777777778, | |
| "pct_stereotype_stderr": 0.049536623805744535 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816505, | |
| "acc_norm": 0.36, | |
| "acc_norm_stderr": 0.04824181513244218 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.33, | |
| "acc_stderr": 0.047258156262526045, | |
| "acc_norm": 0.32, | |
| "acc_norm_stderr": 0.046882617226215034 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.2052980132450331, | |
| "acc_stderr": 0.03297986648473836, | |
| "acc_norm": 0.2980132450331126, | |
| "acc_norm_stderr": 0.03734535676787198 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 12.683394451530612, | |
| "likelihood_difference_stderr": 0.8321591288729919, | |
| "pct_stereotype": 0.45918367346938777, | |
| "pct_stereotype_stderr": 0.03568624151230552 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.09917355371900827, | |
| "acc_stderr": 0.027285246312758957, | |
| "acc_norm": 0.2396694214876033, | |
| "acc_norm_stderr": 0.03896878985070417 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.26, | |
| "acc_stderr": 0.0440844002276808, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.04020151261036845 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.17177914110429449, | |
| "acc_stderr": 0.029634717272371013, | |
| "acc_norm": 0.25766871165644173, | |
| "acc_norm_stderr": 0.03436150827846917 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.22254335260115607, | |
| "acc_stderr": 0.02239421566194282, | |
| "acc_norm": 0.21965317919075145, | |
| "acc_norm_stderr": 0.022289638852617893 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 7.655769230769231, | |
| "likelihood_difference_stderr": 1.2456701776455885, | |
| "pct_stereotype": 0.6307692307692307, | |
| "pct_stereotype_stderr": 0.060324565928300454 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.25925925925925924, | |
| "acc_stderr": 0.02438366553103545, | |
| "acc_norm": 0.24382716049382716, | |
| "acc_norm_stderr": 0.023891879541959603 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.14, | |
| "acc_stderr": 0.0348735088019777, | |
| "acc_norm": 0.19, | |
| "acc_norm_stderr": 0.03942772444036623 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 10.100835755813954, | |
| "likelihood_difference_stderr": 0.23128974328889199, | |
| "pct_stereotype": 0.5819916517590936, | |
| "pct_stereotype_stderr": 0.012047969184920519 | |
| }, | |
| "wsc": { | |
| "acc": 0.6346153846153846, | |
| "acc_stderr": 0.0474473339327792 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.2413793103448276, | |
| "acc_stderr": 0.03565998174135303, | |
| "acc_norm": 0.20689655172413793, | |
| "acc_norm_stderr": 0.03375672449560554 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 6.219184027777778, | |
| "likelihood_difference_stderr": 0.8156476562247187, | |
| "pct_stereotype": 0.5138888888888888, | |
| "pct_stereotype_stderr": 0.05931618532716555 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.1941747572815534, | |
| "acc_stderr": 0.03916667762822582, | |
| "acc_norm": 0.23300970873786409, | |
| "acc_norm_stderr": 0.04185832598928315 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.04109974682633932, | |
| "acc_norm": 0.3125, | |
| "acc_norm_stderr": 0.043994650575715215 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 5.5294045275590555, | |
| "likelihood_difference_stderr": 0.34271615785671483, | |
| "pct_stereotype": 0.36811023622047245, | |
| "pct_stereotype_stderr": 0.021419317453594672 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.027236013946196666, | |
| "acc_norm": 0.23931623931623933, | |
| "acc_norm_stderr": 0.02795182680892433 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.20689655172413793, | |
| "acc_stderr": 0.028501378167893946, | |
| "acc_norm": 0.22167487684729065, | |
| "acc_norm_stderr": 0.029225575892489617 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.24561403508771928, | |
| "acc_stderr": 0.0404933929774814, | |
| "acc_norm": 0.2807017543859649, | |
| "acc_norm_stderr": 0.04227054451232199 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.14457831325301204, | |
| "acc_stderr": 0.027377874786362316, | |
| "acc_norm": 0.18674698795180722, | |
| "acc_norm_stderr": 0.030338749144500615 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.22752293577981653, | |
| "acc_stderr": 0.017974463578776502, | |
| "acc_norm": 0.24954128440366974, | |
| "acc_norm_stderr": 0.01855389762950162 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.25252525252525254, | |
| "acc_stderr": 0.030954055470365897, | |
| "acc_norm": 0.2474747474747475, | |
| "acc_norm_stderr": 0.03074630074212451 | |
| }, | |
| "sciq": { | |
| "acc": 0.223, | |
| "acc_stderr": 0.013169830843425661, | |
| "acc_norm": 0.21, | |
| "acc_norm_stderr": 0.012886662332274547 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 12.11983695652174, | |
| "likelihood_difference_stderr": 0.9761138647537818, | |
| "pct_stereotype": 0.6608695652173913, | |
| "pct_stereotype_stderr": 0.04433930011819816 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 4.749609375, | |
| "likelihood_difference_stderr": 0.4877724715110692, | |
| "pct_stereotype": 0.48125, | |
| "pct_stereotype_stderr": 0.027974934901776306 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.26595744680851063, | |
| "acc_stderr": 0.026358065698880582, | |
| "acc_norm": 0.25886524822695034, | |
| "acc_norm_stderr": 0.026129572527180848 | |
| }, | |
| "logiqa": { | |
| "acc": 0.2196620583717358, | |
| "acc_stderr": 0.01623910941493393, | |
| "acc_norm": 0.23809523809523808, | |
| "acc_norm_stderr": 0.016705867034419633 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.22794117647058823, | |
| "acc_stderr": 0.025483081468029804, | |
| "acc_norm": 0.2867647058823529, | |
| "acc_norm_stderr": 0.027472274473233818 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.1695906432748538, | |
| "acc_stderr": 0.028782108105401712, | |
| "acc_norm": 0.22807017543859648, | |
| "acc_norm_stderr": 0.03218093795602357 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.2835820895522388, | |
| "acc_stderr": 0.03187187537919796, | |
| "acc_norm": 0.2935323383084577, | |
| "acc_norm_stderr": 0.032200241045342054 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.22058823529411764, | |
| "acc_stderr": 0.01677467236546854, | |
| "acc_norm": 0.24019607843137256, | |
| "acc_norm_stderr": 0.017282760695167435 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.26, | |
| "acc_stderr": 0.04408440022768078, | |
| "acc_norm": 0.28, | |
| "acc_norm_stderr": 0.045126085985421276 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.2379421221864952, | |
| "acc_stderr": 0.024185150647818707, | |
| "acc_norm": 0.2861736334405145, | |
| "acc_norm_stderr": 0.025670259242188943 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 9.869972826086956, | |
| "likelihood_difference_stderr": 0.3709338879215957, | |
| "pct_stereotype": 0.7130434782608696, | |
| "pct_stereotype_stderr": 0.021113474740601688 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.18490566037735848, | |
| "acc_stderr": 0.023893351834464324, | |
| "acc_norm": 0.28679245283018867, | |
| "acc_norm_stderr": 0.027834912527544067 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 5.480079755515802, | |
| "likelihood_difference_stderr": 0.19151850776212573, | |
| "pct_stereotype": 0.45855694692904, | |
| "pct_stereotype_stderr": 0.012171273580365826 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 9.49802371541502, | |
| "likelihood_difference_stderr": 0.5281355544781192, | |
| "pct_stereotype": 0.4980237154150198, | |
| "pct_stereotype_stderr": 0.031496793380453074 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.20915032679738563, | |
| "acc_stderr": 0.023287685312334803, | |
| "acc_norm": 0.24836601307189543, | |
| "acc_norm_stderr": 0.02473998135511359 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.19653179190751446, | |
| "acc_stderr": 0.030299574664788147, | |
| "acc_norm": 0.24855491329479767, | |
| "acc_norm_stderr": 0.03295304696818318 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 5.714967105263158, | |
| "likelihood_difference_stderr": 0.5307740830599903, | |
| "pct_stereotype": 0.5684210526315789, | |
| "pct_stereotype_stderr": 0.03602751443822843 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 5.2414772727272725, | |
| "likelihood_difference_stderr": 2.881736459713796, | |
| "pct_stereotype": 0.7272727272727273, | |
| "pct_stereotype_stderr": 0.14083575804390605 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.2074074074074074, | |
| "acc_stderr": 0.03502553170678318, | |
| "acc_norm": 0.28888888888888886, | |
| "acc_norm_stderr": 0.0391545063041425 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.2037037037037037, | |
| "acc_stderr": 0.02074274056012268, | |
| "acc_norm": 0.21957671957671956, | |
| "acc_norm_stderr": 0.021320018599770375 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.022755204959542936, | |
| "acc_norm": 0.22580645161290322, | |
| "acc_norm_stderr": 0.02378557788418101 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 5.359206989247312, | |
| "likelihood_difference_stderr": 0.7683231947337748, | |
| "pct_stereotype": 0.6021505376344086, | |
| "pct_stereotype_stderr": 0.0510291122856655 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.23148148148148148, | |
| "acc_stderr": 0.02876511171804696, | |
| "acc_norm": 0.2962962962962963, | |
| "acc_norm_stderr": 0.031141447823536037 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 10.003472222222221, | |
| "likelihood_difference_stderr": 1.3633059287800664, | |
| "pct_stereotype": 0.4861111111111111, | |
| "pct_stereotype_stderr": 0.059316185327165566 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.2857142857142857, | |
| "acc_stderr": 0.04040610178208841, | |
| "acc_norm": 0.25396825396825395, | |
| "acc_norm_stderr": 0.038932596106046706 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.25190839694656486, | |
| "acc_stderr": 0.038073871163060866, | |
| "acc_norm": 0.25190839694656486, | |
| "acc_norm_stderr": 0.038073871163060866 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.23798882681564246, | |
| "acc_stderr": 0.014242630070574915, | |
| "acc_norm": 0.23798882681564246, | |
| "acc_norm_stderr": 0.014242630070574915 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036845, | |
| "acc_norm": 0.17, | |
| "acc_norm_stderr": 0.03775251680686371 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.2668350168350168, | |
| "acc_stderr": 0.00907591585926725, | |
| "acc_norm": 0.2638888888888889, | |
| "acc_norm_stderr": 0.009043789220055139 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.21, | |
| "acc_stderr": 0.04093601807403325, | |
| "acc_norm": 0.26, | |
| "acc_norm_stderr": 0.0440844002276808 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.18067226890756302, | |
| "acc_stderr": 0.02499196496660074, | |
| "acc_norm": 0.2773109243697479, | |
| "acc_norm_stderr": 0.029079374539480007 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.18518518518518517, | |
| "acc_stderr": 0.03755265865037181, | |
| "acc_norm": 0.24074074074074073, | |
| "acc_norm_stderr": 0.041331194402438376 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.13725490196078433, | |
| "acc_stderr": 0.03424084669891521, | |
| "acc_norm": 0.20588235294117646, | |
| "acc_norm_stderr": 0.04023382273617747 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.2636363636363636, | |
| "acc_stderr": 0.04220224692971987, | |
| "acc_norm": 0.20909090909090908, | |
| "acc_norm_stderr": 0.03895091015724138 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 5.728322072072072, | |
| "likelihood_difference_stderr": 0.6965067589462834, | |
| "pct_stereotype": 0.45045045045045046, | |
| "pct_stereotype_stderr": 0.04743846177747609 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.2230769230769231, | |
| "acc_stderr": 0.02110773012724399, | |
| "acc_norm": 0.25384615384615383, | |
| "acc_norm_stderr": 0.022066054378726257 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 15.282967032967033, | |
| "likelihood_difference_stderr": 1.0847203102990313, | |
| "pct_stereotype": 0.8021978021978022, | |
| "pct_stereotype_stderr": 0.04198895203196222 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.23533246414602346, | |
| "acc_stderr": 0.010834432543912219, | |
| "acc_norm": 0.25684485006518903, | |
| "acc_norm_stderr": 0.011158455853098851 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.189873417721519, | |
| "acc_stderr": 0.025530100460233494, | |
| "acc_norm": 0.22362869198312235, | |
| "acc_norm_stderr": 0.02712329820522997 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.26, | |
| "acc_stderr": 0.044084400227680794, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.04560480215720684 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 7.216346153846154, | |
| "likelihood_difference_stderr": 1.9704931663267538, | |
| "pct_stereotype": 0.46153846153846156, | |
| "pct_stereotype_stderr": 0.14390989949130545 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.2723404255319149, | |
| "acc_stderr": 0.029101290698386708, | |
| "acc_norm": 0.25957446808510637, | |
| "acc_norm_stderr": 0.02865917937429232 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 3.418269230769231, | |
| "likelihood_difference_stderr": 0.6082631522720632, | |
| "pct_stereotype": 0.5274725274725275, | |
| "pct_stereotype_stderr": 0.05262501097748859 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816505, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.04351941398892446 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.20477815699658702, | |
| "acc_stderr": 0.01179254433851342, | |
| "acc_norm": 0.2440273037542662, | |
| "acc_norm_stderr": 0.01255144762785626 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.17575757575757575, | |
| "acc_stderr": 0.02972094300622445, | |
| "acc_norm": 0.22424242424242424, | |
| "acc_norm_stderr": 0.03256866661681102 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.2503192848020434, | |
| "acc_stderr": 0.015491088951494588, | |
| "acc_norm": 0.25287356321839083, | |
| "acc_norm_stderr": 0.015543377313719681 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.2569444444444444, | |
| "acc_stderr": 0.03653946969442099, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.03621034121889507 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 6.14380787037037, | |
| "likelihood_difference_stderr": 0.5217915071777064, | |
| "pct_stereotype": 0.37037037037037035, | |
| "pct_stereotype_stderr": 0.03293377139415191 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 7.492017133956386, | |
| "likelihood_difference_stderr": 0.4566662635366117, | |
| "pct_stereotype": 0.48286604361370716, | |
| "pct_stereotype_stderr": 0.027934433698537306 | |
| }, | |
| "piqa": { | |
| "acc": 0.5272034820457019, | |
| "acc_stderr": 0.011648545262429021, | |
| "acc_norm": 0.5261153427638737, | |
| "acc_norm_stderr": 0.011649900854263415 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.31020408163265306, | |
| "acc_stderr": 0.02961345987248438, | |
| "acc_norm": 0.19183673469387755, | |
| "acc_norm_stderr": 0.025206963154225378 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.19170984455958548, | |
| "acc_stderr": 0.02840895362624527, | |
| "acc_norm": 0.24870466321243523, | |
| "acc_norm_stderr": 0.03119584087770028 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 14.775568181818182, | |
| "likelihood_difference_stderr": 1.4715579883772572, | |
| "pct_stereotype": 0.3939393939393939, | |
| "pct_stereotype_stderr": 0.06060606060606062 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.21, | |
| "acc_stderr": 0.040936018074033256, | |
| "acc_norm": 0.17, | |
| "acc_norm_stderr": 0.03775251680686371 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.2074074074074074, | |
| "acc_stderr": 0.02472071319395215, | |
| "acc_norm": 0.26666666666666666, | |
| "acc_norm_stderr": 0.026962424325073824 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.273542600896861, | |
| "acc_stderr": 0.029918586707798824, | |
| "acc_norm": 0.27802690582959644, | |
| "acc_norm_stderr": 0.030069584874494033 | |
| } | |
| }, | |
| "versions": { | |
| "lambada_openai": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "winogrande": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "crows_pairs_french_age": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "hendrycksTest-international_law": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "crows_pairs_english_disability": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "crows_pairs_french": 0, | |
| "wsc": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "hendrycksTest-management": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "hendrycksTest-virology": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "sciq": 0, | |
| "crows_pairs_french_religion": 0, | |
| "crows_pairs_english_gender": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "logiqa": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "crows_pairs_english": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "crows_pairs_english_autre": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "crows_pairs_french_physical_appearance": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "arc_easy": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "crows_pairs_english_religion": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "crows_pairs_english_age": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "arc_challenge": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "crows_pairs_french_gender": 0, | |
| "piqa": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-human_aging": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "use_accelerate=True,pretrained=EleutherAI/pythia-v1.1-70m,revision=step32", | |
| "num_fewshot": 0, | |
| "batch_size": 32, | |
| "device": null, | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |