Spaces:
Running
Running
| { | |
| "results": { | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.04109974682633932, | |
| "acc_norm": 0.21428571428571427, | |
| "acc_norm_stderr": 0.03894641120044793 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 5.236141304347826, | |
| "likelihood_difference_stderr": 0.5106076448625602, | |
| "pct_stereotype": 0.5391304347826087, | |
| "pct_stereotype_stderr": 0.04668566114758416 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.30514705882352944, | |
| "acc_stderr": 0.027971541370170595, | |
| "acc_norm": 0.27205882352941174, | |
| "acc_norm_stderr": 0.027033041151681456 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 5.860233516483516, | |
| "likelihood_difference_stderr": 0.5089789548023154, | |
| "pct_stereotype": 0.8131868131868132, | |
| "pct_stereotype_stderr": 0.04108446855035881 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.24692737430167597, | |
| "acc_stderr": 0.014422292204808835, | |
| "acc_norm": 0.24692737430167597, | |
| "acc_norm_stderr": 0.014422292204808835 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 6.355113636363637, | |
| "likelihood_difference_stderr": 1.7489509473745437, | |
| "pct_stereotype": 0.5454545454545454, | |
| "pct_stereotype_stderr": 0.1574591643244434 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 2.771291208791209, | |
| "likelihood_difference_stderr": 0.26169461121705356, | |
| "pct_stereotype": 0.5164835164835165, | |
| "pct_stereotype_stderr": 0.05267597952306975 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.3435114503816794, | |
| "acc_stderr": 0.041649760719448786, | |
| "acc_norm": 0.2900763358778626, | |
| "acc_norm_stderr": 0.03980066246467766 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 3.02265625, | |
| "likelihood_difference_stderr": 0.2641863477227852, | |
| "pct_stereotype": 0.540625, | |
| "pct_stereotype_stderr": 0.027902068404300068 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.1814814814814815, | |
| "acc_stderr": 0.02349926466940731, | |
| "acc_norm": 0.23703703703703705, | |
| "acc_norm_stderr": 0.025928876132766104 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.28, | |
| "acc_stderr": 0.04512608598542127, | |
| "acc_norm": 0.27, | |
| "acc_norm_stderr": 0.04461960433384741 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.2777777777777778, | |
| "acc_stderr": 0.04006168083848876, | |
| "acc_norm": 0.23809523809523808, | |
| "acc_norm_stderr": 0.038095238095238126 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.041633319989322716, | |
| "acc_norm": 0.28, | |
| "acc_norm_stderr": 0.045126085985421255 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.2896551724137931, | |
| "acc_stderr": 0.03780019230438014, | |
| "acc_norm": 0.31724137931034485, | |
| "acc_norm_stderr": 0.03878352372138622 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.25660377358490566, | |
| "acc_stderr": 0.026880647889051982, | |
| "acc_norm": 0.3320754716981132, | |
| "acc_norm_stderr": 0.028985455652334388 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.23766816143497757, | |
| "acc_stderr": 0.028568079464714263, | |
| "acc_norm": 0.21973094170403587, | |
| "acc_norm_stderr": 0.02779017706438361 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 4.497311827956989, | |
| "likelihood_difference_stderr": 0.547355686843944, | |
| "pct_stereotype": 0.8172043010752689, | |
| "pct_stereotype_stderr": 0.040295300106155174 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.21243523316062177, | |
| "acc_stderr": 0.02951928261681725, | |
| "acc_norm": 0.2694300518134715, | |
| "acc_norm_stderr": 0.03201867122877794 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.24185136897001303, | |
| "acc_stderr": 0.010936550813827065, | |
| "acc_norm": 0.288135593220339, | |
| "acc_norm_stderr": 0.011567140661324568 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.24019607843137256, | |
| "acc_stderr": 0.01728276069516741, | |
| "acc_norm": 0.2647058823529412, | |
| "acc_norm_stderr": 0.01784808957491322 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.24786324786324787, | |
| "acc_stderr": 0.028286324075564404, | |
| "acc_norm": 0.2777777777777778, | |
| "acc_norm_stderr": 0.02934311479809447 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.1941747572815534, | |
| "acc_stderr": 0.03916667762822583, | |
| "acc_norm": 0.2815533980582524, | |
| "acc_norm_stderr": 0.04453254836326469 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.3, | |
| "acc_stderr": 0.04389311454644286, | |
| "acc_norm": 0.22727272727272727, | |
| "acc_norm_stderr": 0.040139645540727735 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.2875816993464052, | |
| "acc_stderr": 0.02591780611714716, | |
| "acc_norm": 0.35947712418300654, | |
| "acc_norm_stderr": 0.027475969910660952 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 5.818142361111111, | |
| "likelihood_difference_stderr": 0.641566712133372, | |
| "pct_stereotype": 0.5, | |
| "pct_stereotype_stderr": 0.05933908290969268 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.21965317919075145, | |
| "acc_stderr": 0.031568093627031744, | |
| "acc_norm": 0.32947976878612717, | |
| "acc_norm_stderr": 0.03583901754736412 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 3.48705093503937, | |
| "likelihood_difference_stderr": 0.17608529748474508, | |
| "pct_stereotype": 0.4625984251968504, | |
| "pct_stereotype_stderr": 0.02214356608896984 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.20245398773006135, | |
| "acc_stderr": 0.03157065078911902, | |
| "acc_norm": 0.3128834355828221, | |
| "acc_norm_stderr": 0.03642914578292404 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.23544973544973544, | |
| "acc_stderr": 0.02185150982203172, | |
| "acc_norm": 0.24603174603174602, | |
| "acc_norm_stderr": 0.022182037202948365 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.24113475177304963, | |
| "acc_stderr": 0.02551873104953776, | |
| "acc_norm": 0.2553191489361702, | |
| "acc_norm_stderr": 0.026011992930902013 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.22424242424242424, | |
| "acc_stderr": 0.03256866661681102, | |
| "acc_norm": 0.2909090909090909, | |
| "acc_norm_stderr": 0.03546563019624337 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.17, | |
| "acc_stderr": 0.03775251680686371, | |
| "acc_norm": 0.23, | |
| "acc_norm_stderr": 0.04229525846816505 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.2716049382716049, | |
| "acc_stderr": 0.024748624490537382, | |
| "acc_norm": 0.21604938271604937, | |
| "acc_norm_stderr": 0.022899162918445785 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.042295258468165065, | |
| "acc_norm": 0.23, | |
| "acc_norm_stderr": 0.042295258468165065 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.16296296296296298, | |
| "acc_stderr": 0.0319054147448284, | |
| "acc_norm": 0.17037037037037037, | |
| "acc_norm_stderr": 0.03247781185995593 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816506, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.04351941398892446 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 3.807146990740741, | |
| "likelihood_difference_stderr": 0.2792489767677307, | |
| "pct_stereotype": 0.4351851851851852, | |
| "pct_stereotype_stderr": 0.03381200005643525 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.25806451612903225, | |
| "acc_stderr": 0.024892469172462826, | |
| "acc_norm": 0.29354838709677417, | |
| "acc_norm_stderr": 0.025906087021319295 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 3.8326822916666665, | |
| "likelihood_difference_stderr": 0.42534095862131277, | |
| "pct_stereotype": 0.5555555555555556, | |
| "pct_stereotype_stderr": 0.05897165471491952 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.24, | |
| "acc_stderr": 0.04292346959909284, | |
| "acc_norm": 0.32, | |
| "acc_norm_stderr": 0.04688261722621503 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.21568627450980393, | |
| "acc_stderr": 0.04092563958237655, | |
| "acc_norm": 0.28431372549019607, | |
| "acc_norm_stderr": 0.04488482852329017 | |
| }, | |
| "winogrande": { | |
| "acc": 0.4972375690607735, | |
| "acc_stderr": 0.014052271211616441 | |
| }, | |
| "logiqa": { | |
| "acc": 0.20890937019969277, | |
| "acc_stderr": 0.01594539939642392, | |
| "acc_norm": 0.28417818740399386, | |
| "acc_norm_stderr": 0.01769054268019078 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 118.09596009074914, | |
| "ppl_stderr": 4.94543500156858, | |
| "acc": 0.2233650300795653, | |
| "acc_stderr": 0.005802673494605816 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.23232323232323232, | |
| "acc_stderr": 0.030088629490217487, | |
| "acc_norm": 0.2828282828282828, | |
| "acc_norm_stderr": 0.032087795587867514 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.23684210526315788, | |
| "acc_stderr": 0.039994238792813365, | |
| "acc_norm": 0.22807017543859648, | |
| "acc_norm_stderr": 0.03947152782669415 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 4.586209239130435, | |
| "likelihood_difference_stderr": 0.22439998730100816, | |
| "pct_stereotype": 0.2847826086956522, | |
| "pct_stereotype_stderr": 0.02106538604116979 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 3.9653782894736844, | |
| "likelihood_difference_stderr": 0.28742290506987894, | |
| "pct_stereotype": 0.6263157894736842, | |
| "pct_stereotype_stderr": 0.035189909668609055 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 7.6789772727272725, | |
| "likelihood_difference_stderr": 0.41601257841823347, | |
| "pct_stereotype": 0.2648221343873518, | |
| "pct_stereotype_stderr": 0.02779540983044468 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.26788990825688075, | |
| "acc_stderr": 0.018987462257978652, | |
| "acc_norm": 0.25871559633027524, | |
| "acc_norm_stderr": 0.018776052319619617 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 5.2784641472868215, | |
| "likelihood_difference_stderr": 0.12512951793875754, | |
| "pct_stereotype": 0.407871198568873, | |
| "pct_stereotype_stderr": 0.012004182941077525 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 6.4512310606060606, | |
| "likelihood_difference_stderr": 0.6395317220387889, | |
| "pct_stereotype": 0.45454545454545453, | |
| "pct_stereotype_stderr": 0.06176056549879611 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.23178807947019867, | |
| "acc_stderr": 0.03445406271987053, | |
| "acc_norm": 0.2119205298013245, | |
| "acc_norm_stderr": 0.03336767086567978 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.4010942760942761, | |
| "acc_stderr": 0.010057051106534374, | |
| "acc_norm": 0.36447811447811446, | |
| "acc_norm_stderr": 0.009875729282482438 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.25957446808510637, | |
| "acc_stderr": 0.02865917937429232, | |
| "acc_norm": 0.19574468085106383, | |
| "acc_norm_stderr": 0.025937853139977145 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 4.49375, | |
| "likelihood_difference_stderr": 0.48104738994215757, | |
| "pct_stereotype": 0.4, | |
| "pct_stereotype_stderr": 0.05192907868894985 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 3.625680158020274, | |
| "likelihood_difference_stderr": 0.10323728907768165, | |
| "pct_stereotype": 0.5372689326177699, | |
| "pct_stereotype_stderr": 0.012179324068364769 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.25870646766169153, | |
| "acc_stderr": 0.03096590312357304, | |
| "acc_norm": 0.26865671641791045, | |
| "acc_norm_stderr": 0.03134328358208954 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.27330779054916987, | |
| "acc_stderr": 0.01593668106262856, | |
| "acc_norm": 0.2503192848020434, | |
| "acc_norm_stderr": 0.0154910889514946 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.181740614334471, | |
| "acc_stderr": 0.011269198948880236, | |
| "acc_norm": 0.21416382252559726, | |
| "acc_norm_stderr": 0.011988383205966497 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.29239766081871343, | |
| "acc_stderr": 0.03488647713457922, | |
| "acc_norm": 0.3333333333333333, | |
| "acc_norm_stderr": 0.03615507630310935 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.21052631578947367, | |
| "acc_stderr": 0.033176727875331574, | |
| "acc_norm": 0.3618421052631579, | |
| "acc_norm_stderr": 0.03910525752849724 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.36, | |
| "acc_stderr": 0.048241815132442176, | |
| "acc_norm": 0.33, | |
| "acc_norm_stderr": 0.04725815626252604 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.2847222222222222, | |
| "acc_stderr": 0.037738099906869355, | |
| "acc_norm": 0.2847222222222222, | |
| "acc_norm_stderr": 0.03773809990686934 | |
| }, | |
| "piqa": { | |
| "acc": 0.5984766050054406, | |
| "acc_stderr": 0.011437324373397848, | |
| "acc_norm": 0.5930359085963003, | |
| "acc_norm_stderr": 0.011462093919190166 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 4.313473520249222, | |
| "likelihood_difference_stderr": 0.19628650459456284, | |
| "pct_stereotype": 0.5202492211838006, | |
| "pct_stereotype_stderr": 0.027927918885132307 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.23109243697478993, | |
| "acc_stderr": 0.02738140692786898, | |
| "acc_norm": 0.29831932773109243, | |
| "acc_norm_stderr": 0.02971914287634287 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.21686746987951808, | |
| "acc_stderr": 0.03208284450356365, | |
| "acc_norm": 0.2469879518072289, | |
| "acc_norm_stderr": 0.03357351982064536 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.26851851851851855, | |
| "acc_stderr": 0.04284467968052191, | |
| "acc_norm": 0.4074074074074074, | |
| "acc_norm_stderr": 0.04750077341199985 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.24, | |
| "acc_stderr": 0.04292346959909284, | |
| "acc_norm": 0.26, | |
| "acc_norm_stderr": 0.04408440022768078 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 5.187980769230769, | |
| "likelihood_difference_stderr": 0.5880197346199485, | |
| "pct_stereotype": 0.6153846153846154, | |
| "pct_stereotype_stderr": 0.06081303192631497 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.24019607843137256, | |
| "acc_stderr": 0.02998373305591361, | |
| "acc_norm": 0.2696078431372549, | |
| "acc_norm_stderr": 0.031145570659486782 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.21674876847290642, | |
| "acc_stderr": 0.02899033125251624, | |
| "acc_norm": 0.26108374384236455, | |
| "acc_norm_stderr": 0.030903796952114468 | |
| }, | |
| "sciq": { | |
| "acc": 0.664, | |
| "acc_stderr": 0.014944140233795027, | |
| "acc_norm": 0.576, | |
| "acc_norm_stderr": 0.01563548747140519 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.2057877813504823, | |
| "acc_stderr": 0.022961339906764244, | |
| "acc_norm": 0.28938906752411575, | |
| "acc_norm_stderr": 0.025755865922632945 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.30612244897959184, | |
| "acc_stderr": 0.02950489645459597, | |
| "acc_norm": 0.2530612244897959, | |
| "acc_norm_stderr": 0.027833023871399683 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.258974358974359, | |
| "acc_stderr": 0.022211106810061675, | |
| "acc_norm": 0.28205128205128205, | |
| "acc_norm_stderr": 0.022815813098896597 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.27167630057803466, | |
| "acc_stderr": 0.023948512905468365, | |
| "acc_norm": 0.32947976878612717, | |
| "acc_norm_stderr": 0.025305258131879716 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.18181818181818182, | |
| "acc_stderr": 0.03520893951097652, | |
| "acc_norm": 0.4214876033057851, | |
| "acc_norm_stderr": 0.04507732278775094 | |
| }, | |
| "wsc": { | |
| "acc": 0.36538461538461536, | |
| "acc_stderr": 0.0474473339327792 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.04163331998932269, | |
| "acc_norm": 0.32, | |
| "acc_norm_stderr": 0.046882617226215034 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 3.714527027027027, | |
| "likelihood_difference_stderr": 0.39412229193840076, | |
| "pct_stereotype": 0.6396396396396397, | |
| "pct_stereotype_stderr": 0.04577621167070314 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 2.5288461538461537, | |
| "likelihood_difference_stderr": 0.9157702142826863, | |
| "pct_stereotype": 0.38461538461538464, | |
| "pct_stereotype_stderr": 0.1404416814115811 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.270042194092827, | |
| "acc_stderr": 0.028900721906293426, | |
| "acc_norm": 0.270042194092827, | |
| "acc_norm_stderr": 0.028900721906293426 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.18518518518518517, | |
| "acc_stderr": 0.02649191472735516, | |
| "acc_norm": 0.24537037037037038, | |
| "acc_norm_stderr": 0.029346665094372924 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.29, | |
| "acc_stderr": 0.04560480215720684, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.046056618647183814 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 5.089205994897959, | |
| "likelihood_difference_stderr": 0.3514259595841283, | |
| "pct_stereotype": 0.3877551020408163, | |
| "pct_stereotype_stderr": 0.03489185364347385 | |
| } | |
| }, | |
| "versions": { | |
| "hendrycksTest-machine_learning": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "crows_pairs_english_autre": 0, | |
| "crows_pairs_english_age": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "crows_pairs_english_gender": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-management": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "crows_pairs_french_physical_appearance": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "winogrande": 0, | |
| "logiqa": 0, | |
| "lambada_openai": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "crows_pairs_french": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "arc_easy": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "crows_pairs_french_age": 0, | |
| "crows_pairs_english": 0, | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "arc_challenge": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "piqa": 0, | |
| "crows_pairs_french_gender": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "hendrycksTest-virology": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "crows_pairs_english_disability": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "sciq": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "hendrycksTest-international_law": 0, | |
| "wsc": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "crows_pairs_english_religion": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "crows_pairs_french_socioeconomic": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "pretrained=EleutherAI/pythia-v1.1-70m,revision=step33000", | |
| "num_fewshot": 0, | |
| "batch_size": 16, | |
| "device": "cuda:3", | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |