Spaces:
Running
Running
| { | |
| "results": { | |
| "crows_pairs_french": { | |
| "likelihood_difference": 6.341835308586762, | |
| "likelihood_difference_stderr": 0.15048967426229426, | |
| "pct_stereotype": 0.49314251639833034, | |
| "pct_stereotype_stderr": 0.012212150501851282 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.2289156626506024, | |
| "acc_stderr": 0.03270745277352477, | |
| "acc_norm": 0.24096385542168675, | |
| "acc_norm_stderr": 0.03329394119073528 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.2631578947368421, | |
| "acc_stderr": 0.0414243971948936, | |
| "acc_norm": 0.2982456140350877, | |
| "acc_norm_stderr": 0.04303684033537315 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.23669724770642203, | |
| "acc_stderr": 0.01822407811729908, | |
| "acc_norm": 0.25137614678899084, | |
| "acc_norm_stderr": 0.018599206360287415 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 4.267434210526316, | |
| "likelihood_difference_stderr": 0.33123322148424716, | |
| "pct_stereotype": 0.5210526315789473, | |
| "pct_stereotype_stderr": 0.03633739504773335 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.3224489795918367, | |
| "acc_stderr": 0.029923100563683913, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.025607375986579157 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.2128205128205128, | |
| "acc_stderr": 0.020752423722128006, | |
| "acc_norm": 0.24615384615384617, | |
| "acc_norm_stderr": 0.021840866990423088 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 3.94481981981982, | |
| "likelihood_difference_stderr": 0.43861537266600115, | |
| "pct_stereotype": 0.5135135135135135, | |
| "pct_stereotype_stderr": 0.04765571461988585 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.23737373737373738, | |
| "acc_stderr": 0.030313710538198892, | |
| "acc_norm": 0.26262626262626265, | |
| "acc_norm_stderr": 0.03135305009533087 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 4.10218209600477, | |
| "likelihood_difference_stderr": 0.1236077959409224, | |
| "pct_stereotype": 0.48598688133571855, | |
| "pct_stereotype_stderr": 0.012208501686447064 | |
| }, | |
| "logiqa": { | |
| "acc": 0.19201228878648233, | |
| "acc_stderr": 0.01544934998590095, | |
| "acc_norm": 0.22427035330261136, | |
| "acc_norm_stderr": 0.016360043348265515 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.22794117647058823, | |
| "acc_stderr": 0.025483081468029804, | |
| "acc_norm": 0.22426470588235295, | |
| "acc_norm_stderr": 0.025336848563332348 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.22254335260115607, | |
| "acc_stderr": 0.02239421566194282, | |
| "acc_norm": 0.17630057803468208, | |
| "acc_norm_stderr": 0.02051642567249071 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 3.9694444444444446, | |
| "likelihood_difference_stderr": 0.42986839067167343, | |
| "pct_stereotype": 0.4444444444444444, | |
| "pct_stereotype_stderr": 0.052671718126664185 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 4.424454828660436, | |
| "likelihood_difference_stderr": 0.22752081870537358, | |
| "pct_stereotype": 0.5389408099688473, | |
| "pct_stereotype_stderr": 0.027865952192986033 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.034765901043041336, | |
| "acc_norm": 0.24305555555555555, | |
| "acc_norm_stderr": 0.03586879280080342 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.2185430463576159, | |
| "acc_stderr": 0.03374235550425694, | |
| "acc_norm": 0.25165562913907286, | |
| "acc_norm_stderr": 0.035433042343899844 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 6.619357638888889, | |
| "likelihood_difference_stderr": 0.8210291220143636, | |
| "pct_stereotype": 0.5555555555555556, | |
| "pct_stereotype_stderr": 0.05897165471491952 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 8.846590909090908, | |
| "likelihood_difference_stderr": 0.9260560084979663, | |
| "pct_stereotype": 0.4090909090909091, | |
| "pct_stereotype_stderr": 0.060983672113630656 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.26, | |
| "acc_stderr": 0.04408440022768079, | |
| "acc_norm": 0.27, | |
| "acc_norm_stderr": 0.044619604333847394 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 6.335817920918367, | |
| "likelihood_difference_stderr": 0.49605368420562174, | |
| "pct_stereotype": 0.3877551020408163, | |
| "pct_stereotype_stderr": 0.03489185364347385 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.18518518518518517, | |
| "acc_stderr": 0.03755265865037181, | |
| "acc_norm": 0.18518518518518517, | |
| "acc_norm_stderr": 0.03755265865037182 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.24107142857142858, | |
| "acc_stderr": 0.04059867246952686, | |
| "acc_norm": 0.2767857142857143, | |
| "acc_norm_stderr": 0.042466243366976256 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.14074074074074075, | |
| "acc_stderr": 0.0212029303435688, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.024388430433987664 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.2900763358778626, | |
| "acc_stderr": 0.03980066246467765, | |
| "acc_norm": 0.31297709923664124, | |
| "acc_norm_stderr": 0.04066962905677698 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816505, | |
| "acc_norm": 0.23, | |
| "acc_norm_stderr": 0.042295258468165044 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.18652849740932642, | |
| "acc_stderr": 0.02811209121011746, | |
| "acc_norm": 0.2538860103626943, | |
| "acc_norm_stderr": 0.03141024780565319 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.2379400260756193, | |
| "acc_stderr": 0.010875700787694231, | |
| "acc_norm": 0.26988265971316816, | |
| "acc_norm_stderr": 0.011337381084250423 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.024618298195866518, | |
| "acc_norm": 0.2943396226415094, | |
| "acc_norm_stderr": 0.028049186315695245 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.20689655172413793, | |
| "acc_stderr": 0.02850137816789395, | |
| "acc_norm": 0.2512315270935961, | |
| "acc_norm_stderr": 0.030516530732694436 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.22330097087378642, | |
| "acc_stderr": 0.04123553189891431, | |
| "acc_norm": 0.24271844660194175, | |
| "acc_norm_stderr": 0.04245022486384495 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.21710526315789475, | |
| "acc_stderr": 0.03355045304882924, | |
| "acc_norm": 0.2894736842105263, | |
| "acc_norm_stderr": 0.03690677986137282 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.1393939393939394, | |
| "acc_stderr": 0.0270459488258654, | |
| "acc_norm": 0.23636363636363636, | |
| "acc_norm_stderr": 0.03317505930009182 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.24277456647398843, | |
| "acc_stderr": 0.0326926380614177, | |
| "acc_norm": 0.2254335260115607, | |
| "acc_norm_stderr": 0.03186209851641143 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.16666666666666666, | |
| "acc_stderr": 0.03708284662416542, | |
| "acc_norm": 0.24509803921568626, | |
| "acc_norm_stderr": 0.042801058373643966 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.21008403361344538, | |
| "acc_stderr": 0.026461398717471874, | |
| "acc_norm": 0.27310924369747897, | |
| "acc_norm_stderr": 0.02894200404099817 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 3.7877604166666665, | |
| "likelihood_difference_stderr": 0.4734800383795231, | |
| "pct_stereotype": 0.5138888888888888, | |
| "pct_stereotype_stderr": 0.05931618532716555 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.19, | |
| "acc_stderr": 0.03942772444036625, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.04020151261036843 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.2275132275132275, | |
| "acc_stderr": 0.021591269407823774, | |
| "acc_norm": 0.2222222222222222, | |
| "acc_norm_stderr": 0.021411684393694185 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.3452914798206278, | |
| "acc_stderr": 0.03191100192835794, | |
| "acc_norm": 0.2825112107623318, | |
| "acc_norm_stderr": 0.030216831011508773 | |
| }, | |
| "winogrande": { | |
| "acc": 0.5098658247829518, | |
| "acc_stderr": 0.014049749833367592 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 6.442788461538462, | |
| "likelihood_difference_stderr": 0.7741982131043712, | |
| "pct_stereotype": 0.5230769230769231, | |
| "pct_stereotype_stderr": 0.06243339646441512 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 14.371565934065934, | |
| "likelihood_difference_stderr": 1.0132795779676502, | |
| "pct_stereotype": 0.8021978021978022, | |
| "pct_stereotype_stderr": 0.04198895203196222 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.22258064516129034, | |
| "acc_stderr": 0.023664216671642514, | |
| "acc_norm": 0.24516129032258063, | |
| "acc_norm_stderr": 0.024472243840895525 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.18003412969283278, | |
| "acc_stderr": 0.011227856729050028, | |
| "acc_norm": 0.2175767918088737, | |
| "acc_norm_stderr": 0.012057262020972499 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036845, | |
| "acc_norm": 0.24, | |
| "acc_norm_stderr": 0.04292346959909283 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.24, | |
| "acc_stderr": 0.04292346959909282, | |
| "acc_norm": 0.19, | |
| "acc_norm_stderr": 0.03942772444036624 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 116756.33428953367, | |
| "ppl_stderr": 6456.789280142739, | |
| "acc": 0.0, | |
| "acc_stderr": 0.0 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 4.444220430107527, | |
| "likelihood_difference_stderr": 0.6472437756111237, | |
| "pct_stereotype": 0.7096774193548387, | |
| "pct_stereotype_stderr": 0.04732351421824121 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.25462962962962965, | |
| "acc_stderr": 0.02971127586000536, | |
| "acc_norm": 0.24074074074074073, | |
| "acc_norm_stderr": 0.029157522184605596 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.27, | |
| "acc_stderr": 0.04461960433384739, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.046056618647183814 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 4.9554347826086955, | |
| "likelihood_difference_stderr": 0.22275405195298537, | |
| "pct_stereotype": 0.5260869565217391, | |
| "pct_stereotype_stderr": 0.0233062153668594 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 3.65625, | |
| "likelihood_difference_stderr": 0.4765308636339587, | |
| "pct_stereotype": 0.38461538461538464, | |
| "pct_stereotype_stderr": 0.051282051282051246 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.22186495176848875, | |
| "acc_stderr": 0.02359885829286305, | |
| "acc_norm": 0.2797427652733119, | |
| "acc_norm_stderr": 0.0254942593506949 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.2827586206896552, | |
| "acc_stderr": 0.037528339580033376, | |
| "acc_norm": 0.2689655172413793, | |
| "acc_norm_stderr": 0.036951833116502325 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.21393034825870647, | |
| "acc_stderr": 0.028996909693328927, | |
| "acc_norm": 0.21393034825870647, | |
| "acc_norm_stderr": 0.02899690969332891 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.2695035460992908, | |
| "acc_stderr": 0.026469036818590624, | |
| "acc_norm": 0.2730496453900709, | |
| "acc_norm_stderr": 0.02657786094330786 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.24393358876117496, | |
| "acc_stderr": 0.01535721266582948, | |
| "acc_norm": 0.25287356321839083, | |
| "acc_norm_stderr": 0.015543377313719681 | |
| }, | |
| "sciq": { | |
| "acc": 0.264, | |
| "acc_stderr": 0.01394627184944047, | |
| "acc_norm": 0.275, | |
| "acc_norm_stderr": 0.014127086556490528 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 5.8173076923076925, | |
| "likelihood_difference_stderr": 0.8524880376227814, | |
| "pct_stereotype": 0.3076923076923077, | |
| "pct_stereotype_stderr": 0.13323467750529824 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.3181818181818182, | |
| "acc_stderr": 0.044612721759105085, | |
| "acc_norm": 0.15454545454545454, | |
| "acc_norm_stderr": 0.03462262571262667 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 7.920380434782609, | |
| "likelihood_difference_stderr": 0.5131357048721925, | |
| "pct_stereotype": 0.5652173913043478, | |
| "pct_stereotype_stderr": 0.046429222863564275 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.12396694214876033, | |
| "acc_stderr": 0.030083098716035227, | |
| "acc_norm": 0.2727272727272727, | |
| "acc_norm_stderr": 0.04065578140908705 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.22962962962962963, | |
| "acc_stderr": 0.036333844140734664, | |
| "acc_norm": 0.2518518518518518, | |
| "acc_norm_stderr": 0.03749850709174023 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.2765957446808511, | |
| "acc_stderr": 0.029241883869628827, | |
| "acc_norm": 0.20425531914893616, | |
| "acc_norm_stderr": 0.026355158413349424 | |
| }, | |
| "wsc": { | |
| "acc": 0.5192307692307693, | |
| "acc_stderr": 0.049230010729780505 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.23798882681564246, | |
| "acc_stderr": 0.014242630070574915, | |
| "acc_norm": 0.27262569832402234, | |
| "acc_norm_stderr": 0.014893391735249588 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.042295258468165044, | |
| "acc_norm": 0.22, | |
| "acc_norm_stderr": 0.04163331998932269 | |
| }, | |
| "piqa": { | |
| "acc": 0.5386289445048966, | |
| "acc_stderr": 0.011630956681145914, | |
| "acc_norm": 0.5244831338411317, | |
| "acc_norm_stderr": 0.011651830225709979 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.16, | |
| "acc_stderr": 0.03684529491774709, | |
| "acc_norm": 0.19, | |
| "acc_norm_stderr": 0.03942772444036625 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.27469135802469136, | |
| "acc_stderr": 0.024836057868294688, | |
| "acc_norm": 0.2191358024691358, | |
| "acc_norm_stderr": 0.023016705640262196 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.2369281045751634, | |
| "acc_stderr": 0.017201662169789782, | |
| "acc_norm": 0.2973856209150327, | |
| "acc_norm_stderr": 0.01849259653639695 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.2147239263803681, | |
| "acc_stderr": 0.03226219377286774, | |
| "acc_norm": 0.3128834355828221, | |
| "acc_norm_stderr": 0.036429145782924055 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 4.462456597222222, | |
| "likelihood_difference_stderr": 0.33400887699163057, | |
| "pct_stereotype": 0.33796296296296297, | |
| "pct_stereotype_stderr": 0.03225941352631295 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.23015873015873015, | |
| "acc_stderr": 0.03764950879790607, | |
| "acc_norm": 0.25396825396825395, | |
| "acc_norm_stderr": 0.038932596106046755 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 3.560498046875, | |
| "likelihood_difference_stderr": 0.32984136074752396, | |
| "pct_stereotype": 0.5375, | |
| "pct_stereotype_stderr": 0.02791577963000664 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.189873417721519, | |
| "acc_stderr": 0.025530100460233497, | |
| "acc_norm": 0.23628691983122363, | |
| "acc_norm_stderr": 0.02765215314415925 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.24, | |
| "acc_stderr": 0.042923469599092816, | |
| "acc_norm": 0.21, | |
| "acc_norm_stderr": 0.040936018074033256 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816506, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.04560480215720684 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.14035087719298245, | |
| "acc_stderr": 0.0266405825391332, | |
| "acc_norm": 0.21052631578947367, | |
| "acc_norm_stderr": 0.03126781714663179 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 6.849431818181818, | |
| "likelihood_difference_stderr": 2.586994276246196, | |
| "pct_stereotype": 0.36363636363636365, | |
| "pct_stereotype_stderr": 0.15212000482437738 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.20588235294117646, | |
| "acc_stderr": 0.028379449451588667, | |
| "acc_norm": 0.23529411764705882, | |
| "acc_norm_stderr": 0.02977177522814565 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.17973856209150327, | |
| "acc_stderr": 0.021986032182064148, | |
| "acc_norm": 0.27450980392156865, | |
| "acc_norm_stderr": 0.025553169991826517 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 7.832756916996048, | |
| "likelihood_difference_stderr": 0.35785767445511346, | |
| "pct_stereotype": 0.3438735177865613, | |
| "pct_stereotype_stderr": 0.029922155720849428 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.2984006734006734, | |
| "acc_stderr": 0.009388855914040428, | |
| "acc_norm": 0.30134680134680136, | |
| "acc_norm_stderr": 0.0094152598793516 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.24358974358974358, | |
| "acc_stderr": 0.0281209665039144, | |
| "acc_norm": 0.25213675213675213, | |
| "acc_norm_stderr": 0.02844796547623101 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 3.9656434547244093, | |
| "likelihood_difference_stderr": 0.19638996276808376, | |
| "pct_stereotype": 0.468503937007874, | |
| "pct_stereotype_stderr": 0.022161679438492773 | |
| } | |
| }, | |
| "versions": { | |
| "crows_pairs_french": 0, | |
| "hendrycksTest-virology": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "crows_pairs_english_religion": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "crows_pairs_english": 0, | |
| "logiqa": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "crows_pairs_french_age": 0, | |
| "crows_pairs_french_gender": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "crows_pairs_french_physical_appearance": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-management": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "winogrande": 0, | |
| "crows_pairs_english_disability": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "arc_challenge": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "lambada_openai": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "crows_pairs_english_age": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "sciq": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-international_law": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "wsc": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "piqa": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "crows_pairs_english_gender": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "crows_pairs_english_autre": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "arc_easy": 0, | |
| "hendrycksTest-marketing": 0, | |
| "crows_pairs_english_race_color": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "use_accelerate=True,pretrained=EleutherAI/pythia-v1.1-70m,revision=step512", | |
| "num_fewshot": 0, | |
| "batch_size": 32, | |
| "device": null, | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |