Spaces:
Running
Running
| { | |
| "results": { | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 8.304253472222221, | |
| "likelihood_difference_stderr": 1.1645432805257396, | |
| "pct_stereotype": 0.5694444444444444, | |
| "pct_stereotype_stderr": 0.05876396677084613 | |
| }, | |
| "logiqa": { | |
| "acc": 0.20276497695852536, | |
| "acc_stderr": 0.015770046635584567, | |
| "acc_norm": 0.22734254992319508, | |
| "acc_norm_stderr": 0.016439067675117765 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.25287356321839083, | |
| "acc_stderr": 0.015543377313719681, | |
| "acc_norm": 0.26309067688378035, | |
| "acc_norm_stderr": 0.015745497169049043 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.1875, | |
| "acc_stderr": 0.0370468111477387, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.04109974682633932 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 8.420701581027668, | |
| "likelihood_difference_stderr": 0.40347811300443653, | |
| "pct_stereotype": 0.6047430830039525, | |
| "pct_stereotype_stderr": 0.030798170848773863 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.02835321286686343, | |
| "acc_norm": 0.2638888888888889, | |
| "acc_norm_stderr": 0.03005820270430985 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.19, | |
| "acc_stderr": 0.03942772444036624, | |
| "acc_norm": 0.24, | |
| "acc_norm_stderr": 0.04292346959909283 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.22727272727272727, | |
| "acc_stderr": 0.029857515673386417, | |
| "acc_norm": 0.2474747474747475, | |
| "acc_norm_stderr": 0.03074630074212451 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.20833333333333334, | |
| "acc_stderr": 0.03396116205845335, | |
| "acc_norm": 0.24305555555555555, | |
| "acc_norm_stderr": 0.03586879280080341 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.27009646302250806, | |
| "acc_stderr": 0.025218040373410616, | |
| "acc_norm": 0.27009646302250806, | |
| "acc_norm_stderr": 0.02521804037341062 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.20634920634920634, | |
| "acc_stderr": 0.02084229093011467, | |
| "acc_norm": 0.2222222222222222, | |
| "acc_norm_stderr": 0.021411684393694185 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 7.021634615384615, | |
| "likelihood_difference_stderr": 0.97111821919133, | |
| "pct_stereotype": 0.47692307692307695, | |
| "pct_stereotype_stderr": 0.062433396464415106 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 5.677083333333333, | |
| "likelihood_difference_stderr": 0.6378864031151475, | |
| "pct_stereotype": 0.5053763440860215, | |
| "pct_stereotype_stderr": 0.05212558986469174 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 10.822225765306122, | |
| "likelihood_difference_stderr": 0.6868426421668223, | |
| "pct_stereotype": 0.3010204081632653, | |
| "pct_stereotype_stderr": 0.0328483010552734 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.23798882681564246, | |
| "acc_stderr": 0.014242630070574915, | |
| "acc_norm": 0.27262569832402234, | |
| "acc_norm_stderr": 0.014893391735249588 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.3183673469387755, | |
| "acc_stderr": 0.02982253379398205, | |
| "acc_norm": 0.22857142857142856, | |
| "acc_norm_stderr": 0.02688214492230775 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 5.092623873873874, | |
| "likelihood_difference_stderr": 0.5870374592750236, | |
| "pct_stereotype": 0.5045045045045045, | |
| "pct_stereotype_stderr": 0.047671194793956616 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.32061068702290074, | |
| "acc_stderr": 0.04093329229834278, | |
| "acc_norm": 0.31297709923664124, | |
| "acc_norm_stderr": 0.04066962905677697 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 5.673295454545454, | |
| "likelihood_difference_stderr": 2.5462810016897417, | |
| "pct_stereotype": 0.45454545454545453, | |
| "pct_stereotype_stderr": 0.15745916432444335 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.1895424836601307, | |
| "acc_stderr": 0.022442358263336216, | |
| "acc_norm": 0.2647058823529412, | |
| "acc_norm_stderr": 0.02526169121972948 | |
| }, | |
| "wsc": { | |
| "acc": 0.6346153846153846, | |
| "acc_stderr": 0.0474473339327792 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 8.28959824090638, | |
| "likelihood_difference_stderr": 0.1813110538245477, | |
| "pct_stereotype": 0.5724508050089445, | |
| "pct_stereotype_stderr": 0.012084400901134948 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.20512820512820512, | |
| "acc_stderr": 0.02645350805404033, | |
| "acc_norm": 0.23076923076923078, | |
| "acc_norm_stderr": 0.027601921381417607 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.20725388601036268, | |
| "acc_stderr": 0.02925282329180363, | |
| "acc_norm": 0.27461139896373055, | |
| "acc_norm_stderr": 0.03221024508041156 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.23533246414602346, | |
| "acc_stderr": 0.010834432543912219, | |
| "acc_norm": 0.2653194263363755, | |
| "acc_norm_stderr": 0.011276198843958866 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036846, | |
| "acc_norm": 0.23, | |
| "acc_norm_stderr": 0.042295258468165044 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816505, | |
| "acc_norm": 0.28, | |
| "acc_norm_stderr": 0.04512608598542129 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.2254335260115607, | |
| "acc_stderr": 0.03186209851641144, | |
| "acc_norm": 0.27167630057803466, | |
| "acc_norm_stderr": 0.03391750322321659 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.2119205298013245, | |
| "acc_stderr": 0.03336767086567977, | |
| "acc_norm": 0.26490066225165565, | |
| "acc_norm_stderr": 0.03603038545360384 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.19032258064516128, | |
| "acc_stderr": 0.022331707611823078, | |
| "acc_norm": 0.2161290322580645, | |
| "acc_norm_stderr": 0.023415293433568515 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 1665636.981895382, | |
| "ppl_stderr": 131507.1881823213, | |
| "acc": 0.0, | |
| "acc_stderr": 0.0 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 13.809065934065934, | |
| "likelihood_difference_stderr": 0.8840201832791948, | |
| "pct_stereotype": 0.8021978021978022, | |
| "pct_stereotype_stderr": 0.04198895203196222 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036845, | |
| "acc_norm": 0.15, | |
| "acc_norm_stderr": 0.03588702812826372 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.31, | |
| "acc_stderr": 0.04648231987117316, | |
| "acc_norm": 0.35, | |
| "acc_norm_stderr": 0.04793724854411019 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.2825112107623318, | |
| "acc_stderr": 0.03021683101150877, | |
| "acc_norm": 0.27802690582959644, | |
| "acc_norm_stderr": 0.030069584874494033 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.21098265895953758, | |
| "acc_stderr": 0.021966309947043117, | |
| "acc_norm": 0.2023121387283237, | |
| "acc_norm_stderr": 0.021628077380196137 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 9.863315217391305, | |
| "likelihood_difference_stderr": 0.6835325659384403, | |
| "pct_stereotype": 0.6782608695652174, | |
| "pct_stereotype_stderr": 0.04375199868936841 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.2736318407960199, | |
| "acc_stderr": 0.031524391865554044, | |
| "acc_norm": 0.26865671641791045, | |
| "acc_norm_stderr": 0.03134328358208954 | |
| }, | |
| "piqa": { | |
| "acc": 0.5261153427638737, | |
| "acc_stderr": 0.011649900854263423, | |
| "acc_norm": 0.5184983677910773, | |
| "acc_norm_stderr": 0.01165783758381816 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.20136518771331058, | |
| "acc_stderr": 0.011718927477444262, | |
| "acc_norm": 0.23890784982935154, | |
| "acc_norm_stderr": 0.012461071376316617 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.18067226890756302, | |
| "acc_stderr": 0.02499196496660076, | |
| "acc_norm": 0.27310924369747897, | |
| "acc_norm_stderr": 0.028942004040998167 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.2676767676767677, | |
| "acc_stderr": 0.009085000147099353, | |
| "acc_norm": 0.2756734006734007, | |
| "acc_norm_stderr": 0.009169229476542562 | |
| }, | |
| "winogrande": { | |
| "acc": 0.49171270718232046, | |
| "acc_stderr": 0.014050555322824192 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.18421052631578946, | |
| "acc_stderr": 0.031546980450822305, | |
| "acc_norm": 0.29605263157894735, | |
| "acc_norm_stderr": 0.03715062154998905 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.24468085106382978, | |
| "acc_stderr": 0.02564555362226673, | |
| "acc_norm": 0.25886524822695034, | |
| "acc_norm_stderr": 0.026129572527180848 | |
| }, | |
| "sciq": { | |
| "acc": 0.219, | |
| "acc_stderr": 0.013084731950262024, | |
| "acc_norm": 0.225, | |
| "acc_norm_stderr": 0.013211720158614751 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.042295258468165044, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.045604802157206845 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.3021276595744681, | |
| "acc_stderr": 0.030017554471880557, | |
| "acc_norm": 0.2723404255319149, | |
| "acc_norm_stderr": 0.029101290698386698 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.04351941398892446, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.04351941398892446 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.15757575757575756, | |
| "acc_stderr": 0.02845038880528435, | |
| "acc_norm": 0.22424242424242424, | |
| "acc_norm_stderr": 0.03256866661681102 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.23300970873786409, | |
| "acc_stderr": 0.04185832598928315, | |
| "acc_norm": 0.2815533980582524, | |
| "acc_norm_stderr": 0.04453254836326466 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 5.005841905187835, | |
| "likelihood_difference_stderr": 0.1594255961602287, | |
| "pct_stereotype": 0.4442456768038163, | |
| "pct_stereotype_stderr": 0.012137130534698495 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.2727272727272727, | |
| "acc_stderr": 0.04265792110940588, | |
| "acc_norm": 0.16363636363636364, | |
| "acc_norm_stderr": 0.03543433054298678 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.24, | |
| "acc_stderr": 0.042923469599092816, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.045604802157206845 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.18674698795180722, | |
| "acc_stderr": 0.030338749144500618, | |
| "acc_norm": 0.23493975903614459, | |
| "acc_norm_stderr": 0.03300533186128922 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.14814814814814814, | |
| "acc_stderr": 0.02165977842211803, | |
| "acc_norm": 0.22592592592592592, | |
| "acc_norm_stderr": 0.02549753263960955 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.18518518518518517, | |
| "acc_stderr": 0.03355677216313142, | |
| "acc_norm": 0.26666666666666666, | |
| "acc_norm_stderr": 0.038201699145179055 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.2857142857142857, | |
| "acc_stderr": 0.04040610178208841, | |
| "acc_norm": 0.2777777777777778, | |
| "acc_norm_stderr": 0.040061680838488774 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.1574074074074074, | |
| "acc_stderr": 0.035207039905179656, | |
| "acc_norm": 0.23148148148148148, | |
| "acc_norm_stderr": 0.04077494709252627 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.09090909090909091, | |
| "acc_stderr": 0.026243194054073878, | |
| "acc_norm": 0.2727272727272727, | |
| "acc_norm_stderr": 0.04065578140908705 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 6.225961538461538, | |
| "likelihood_difference_stderr": 1.0845778061093316, | |
| "pct_stereotype": 0.6153846153846154, | |
| "pct_stereotype_stderr": 0.14044168141158106 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816506, | |
| "acc_norm": 0.23, | |
| "acc_norm_stderr": 0.04229525846816506 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 4.482080078125, | |
| "likelihood_difference_stderr": 0.4507987456254625, | |
| "pct_stereotype": 0.54375, | |
| "pct_stereotype_stderr": 0.027887252708654657 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 6.005063657407407, | |
| "likelihood_difference_stderr": 0.4386043275416813, | |
| "pct_stereotype": 0.3194444444444444, | |
| "pct_stereotype_stderr": 0.0317987634217685 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 4.779035433070866, | |
| "likelihood_difference_stderr": 0.26206678827654106, | |
| "pct_stereotype": 0.3346456692913386, | |
| "pct_stereotype_stderr": 0.02095632470166831 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 7.161277173913043, | |
| "likelihood_difference_stderr": 0.26238666011376527, | |
| "pct_stereotype": 0.7108695652173913, | |
| "pct_stereotype_stderr": 0.02116096760624947 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 12.47064393939394, | |
| "likelihood_difference_stderr": 1.234102889806993, | |
| "pct_stereotype": 0.3333333333333333, | |
| "pct_stereotype_stderr": 0.0584705346204686 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.19831223628691982, | |
| "acc_stderr": 0.025955020841621115, | |
| "acc_norm": 0.2616033755274262, | |
| "acc_norm_stderr": 0.028609516716994934 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.20245398773006135, | |
| "acc_stderr": 0.031570650789119, | |
| "acc_norm": 0.3006134969325153, | |
| "acc_norm_stderr": 0.03602511318806771 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.18719211822660098, | |
| "acc_stderr": 0.027444924966882618, | |
| "acc_norm": 0.21674876847290642, | |
| "acc_norm_stderr": 0.02899033125251624 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816506, | |
| "acc_norm": 0.21, | |
| "acc_norm_stderr": 0.040936018074033256 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 4.382638888888889, | |
| "likelihood_difference_stderr": 0.563720955887399, | |
| "pct_stereotype": 0.4666666666666667, | |
| "pct_stereotype_stderr": 0.05288198530254015 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.21025641025641026, | |
| "acc_stderr": 0.020660597485026938, | |
| "acc_norm": 0.26153846153846155, | |
| "acc_norm_stderr": 0.022282141204204426 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.24183006535947713, | |
| "acc_stderr": 0.017322789207784326, | |
| "acc_norm": 0.26633986928104575, | |
| "acc_norm_stderr": 0.017883188134667164 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.21929824561403508, | |
| "acc_stderr": 0.03892431106518752, | |
| "acc_norm": 0.2719298245614035, | |
| "acc_norm_stderr": 0.04185774424022057 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 3.299793956043956, | |
| "likelihood_difference_stderr": 0.48606624275634924, | |
| "pct_stereotype": 0.4835164835164835, | |
| "pct_stereotype_stderr": 0.05267597952306975 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.18490566037735848, | |
| "acc_stderr": 0.02389335183446432, | |
| "acc_norm": 0.30943396226415093, | |
| "acc_norm_stderr": 0.028450154794118627 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.17647058823529413, | |
| "acc_stderr": 0.0379328118530781, | |
| "acc_norm": 0.17647058823529413, | |
| "acc_norm_stderr": 0.037932811853078105 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 5.136284722222222, | |
| "likelihood_difference_stderr": 0.6655017702667131, | |
| "pct_stereotype": 0.5277777777777778, | |
| "pct_stereotype_stderr": 0.05924743948371487 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 6.4443146417445485, | |
| "likelihood_difference_stderr": 0.3439159663666914, | |
| "pct_stereotype": 0.48909657320872274, | |
| "pct_stereotype_stderr": 0.02794420307081864 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.19117647058823528, | |
| "acc_stderr": 0.02759917430064077, | |
| "acc_norm": 0.23039215686274508, | |
| "acc_norm_stderr": 0.029554292605695063 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.2413793103448276, | |
| "acc_stderr": 0.03565998174135302, | |
| "acc_norm": 0.2620689655172414, | |
| "acc_norm_stderr": 0.036646663372252565 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 5.01858552631579, | |
| "likelihood_difference_stderr": 0.4299366436044498, | |
| "pct_stereotype": 0.5842105263157895, | |
| "pct_stereotype_stderr": 0.0358501132552001 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.2623456790123457, | |
| "acc_stderr": 0.024477222856135118, | |
| "acc_norm": 0.24691358024691357, | |
| "acc_norm_stderr": 0.023993501709042124 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.23669724770642203, | |
| "acc_stderr": 0.01822407811729907, | |
| "acc_norm": 0.24770642201834864, | |
| "acc_norm_stderr": 0.018508143602547815 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.26838235294117646, | |
| "acc_stderr": 0.026917481224377232, | |
| "acc_norm": 0.28308823529411764, | |
| "acc_norm_stderr": 0.027365861131513805 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.1871345029239766, | |
| "acc_stderr": 0.029913127232368043, | |
| "acc_norm": 0.23976608187134502, | |
| "acc_norm_stderr": 0.032744852119469564 | |
| } | |
| }, | |
| "versions": { | |
| "crows_pairs_french_physical_appearance": 0, | |
| "logiqa": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "crows_pairs_english_disability": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "crows_pairs_english_religion": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "crows_pairs_english_autre": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "wsc": 0, | |
| "crows_pairs_french": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "lambada_openai": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-sociology": 0, | |
| "piqa": 0, | |
| "arc_challenge": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "arc_easy": 0, | |
| "winogrande": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "sciq": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "hendrycksTest-management": 0, | |
| "crows_pairs_english": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-virology": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "hendrycksTest-international_law": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "crows_pairs_english_gender": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "crows_pairs_french_age": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "crows_pairs_english_age": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "crows_pairs_french_gender": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "hendrycksTest-world_religions": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "use_accelerate=True,pretrained=EleutherAI/pythia-v1.1-70m,revision=step128", | |
| "num_fewshot": 0, | |
| "batch_size": 32, | |
| "device": null, | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |