Spaces:
Running
Running
| { | |
| "results": { | |
| "hendrycksTest-sociology": { | |
| "acc": 0.2736318407960199, | |
| "acc_stderr": 0.03152439186555404, | |
| "acc_norm": 0.3034825870646766, | |
| "acc_norm_stderr": 0.03251006816458617 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.19747899159663865, | |
| "acc_stderr": 0.02585916412205146, | |
| "acc_norm": 0.3025210084033613, | |
| "acc_norm_stderr": 0.02983796238829193 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.04163331998932269, | |
| "acc_norm": 0.19, | |
| "acc_norm_stderr": 0.03942772444036623 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.2680851063829787, | |
| "acc_stderr": 0.02895734278834235, | |
| "acc_norm": 0.18723404255319148, | |
| "acc_norm_stderr": 0.02550158834188358 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.2037037037037037, | |
| "acc_stderr": 0.027467401804057986, | |
| "acc_norm": 0.22685185185185186, | |
| "acc_norm_stderr": 0.02856165010242227 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.23018867924528302, | |
| "acc_stderr": 0.025907897122408173, | |
| "acc_norm": 0.32452830188679244, | |
| "acc_norm_stderr": 0.028815615713432118 | |
| }, | |
| "piqa": { | |
| "acc": 0.5919477693144722, | |
| "acc_stderr": 0.011466872778651261, | |
| "acc_norm": 0.5979325353645266, | |
| "acc_norm_stderr": 0.01143986712726753 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 5.745738636363637, | |
| "likelihood_difference_stderr": 0.603740965474876, | |
| "pct_stereotype": 0.48484848484848486, | |
| "pct_stereotype_stderr": 0.06198888629778894 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.2138728323699422, | |
| "acc_stderr": 0.03126511206173042, | |
| "acc_norm": 0.3063583815028902, | |
| "acc_norm_stderr": 0.03514942551267437 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 4.940384615384615, | |
| "likelihood_difference_stderr": 0.5258513529267634, | |
| "pct_stereotype": 0.6153846153846154, | |
| "pct_stereotype_stderr": 0.06081303192631497 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.20175438596491227, | |
| "acc_stderr": 0.037752050135836386, | |
| "acc_norm": 0.19298245614035087, | |
| "acc_norm_stderr": 0.037124548537213684 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.35, | |
| "acc_stderr": 0.04793724854411018, | |
| "acc_norm": 0.28, | |
| "acc_norm_stderr": 0.04512608598542128 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.29533678756476683, | |
| "acc_stderr": 0.032922966391551414, | |
| "acc_norm": 0.27461139896373055, | |
| "acc_norm_stderr": 0.03221024508041154 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 4.6717032967032965, | |
| "likelihood_difference_stderr": 0.35079580322071463, | |
| "pct_stereotype": 0.8021978021978022, | |
| "pct_stereotype_stderr": 0.04198895203196222 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.040201512610368466, | |
| "acc_norm": 0.31, | |
| "acc_norm_stderr": 0.04648231987117316 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 4.72429049744898, | |
| "likelihood_difference_stderr": 0.38514448828446046, | |
| "pct_stereotype": 0.45408163265306123, | |
| "pct_stereotype_stderr": 0.035654431417332814 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 3.67170728980322, | |
| "likelihood_difference_stderr": 0.1032630912208814, | |
| "pct_stereotype": 0.545020870602266, | |
| "pct_stereotype_stderr": 0.012163688705232118 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 5.014772473166368, | |
| "likelihood_difference_stderr": 0.12242859643295022, | |
| "pct_stereotype": 0.43410852713178294, | |
| "pct_stereotype_stderr": 0.012106782103996008 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036845, | |
| "acc_norm": 0.26, | |
| "acc_norm_stderr": 0.04408440022768078 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.2681992337164751, | |
| "acc_stderr": 0.015842430835269435, | |
| "acc_norm": 0.2515964240102171, | |
| "acc_norm_stderr": 0.015517322365529619 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.2630057803468208, | |
| "acc_stderr": 0.023703099525258155, | |
| "acc_norm": 0.2947976878612717, | |
| "acc_norm_stderr": 0.02454761779480383 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.3, | |
| "acc_stderr": 0.046056618647183814, | |
| "acc_norm": 0.27, | |
| "acc_norm_stderr": 0.04461960433384741 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.25027932960893856, | |
| "acc_stderr": 0.014487500852850412, | |
| "acc_norm": 0.24692737430167597, | |
| "acc_norm_stderr": 0.014422292204808835 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.2074074074074074, | |
| "acc_stderr": 0.024720713193952148, | |
| "acc_norm": 0.2518518518518518, | |
| "acc_norm_stderr": 0.026466117538959902 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.2647058823529412, | |
| "acc_stderr": 0.03096451792692341, | |
| "acc_norm": 0.2647058823529412, | |
| "acc_norm_stderr": 0.03096451792692341 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 4.173773364485982, | |
| "likelihood_difference_stderr": 0.20666001663696318, | |
| "pct_stereotype": 0.5327102803738317, | |
| "pct_stereotype_stderr": 0.027890972865217984 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 3.8569878472222223, | |
| "likelihood_difference_stderr": 0.44844825841380226, | |
| "pct_stereotype": 0.5277777777777778, | |
| "pct_stereotype_stderr": 0.05924743948371487 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.18543046357615894, | |
| "acc_stderr": 0.03173284384294287, | |
| "acc_norm": 0.2185430463576159, | |
| "acc_norm_stderr": 0.03374235550425694 | |
| }, | |
| "wsc": { | |
| "acc": 0.36538461538461536, | |
| "acc_stderr": 0.0474473339327792 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.18518518518518517, | |
| "acc_stderr": 0.03755265865037181, | |
| "acc_norm": 0.37037037037037035, | |
| "acc_norm_stderr": 0.04668408033024931 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.39225589225589225, | |
| "acc_stderr": 0.010018744689650043, | |
| "acc_norm": 0.35858585858585856, | |
| "acc_norm_stderr": 0.009840882301225297 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.30952380952380953, | |
| "acc_stderr": 0.04134913018303316, | |
| "acc_norm": 0.29365079365079366, | |
| "acc_norm_stderr": 0.040735243221471255 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.27155963302752295, | |
| "acc_stderr": 0.019069098363191442, | |
| "acc_norm": 0.26605504587155965, | |
| "acc_norm_stderr": 0.018946022322225614 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 5.355113636363637, | |
| "likelihood_difference_stderr": 1.5602556194869146, | |
| "pct_stereotype": 0.5454545454545454, | |
| "pct_stereotype_stderr": 0.1574591643244434 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.2606060606060606, | |
| "acc_stderr": 0.034277431758165236, | |
| "acc_norm": 0.2787878787878788, | |
| "acc_norm_stderr": 0.035014387062967806 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 3.9657894736842105, | |
| "likelihood_difference_stderr": 0.2608872260073087, | |
| "pct_stereotype": 0.6473684210526316, | |
| "pct_stereotype_stderr": 0.034754052595820976 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.296551724137931, | |
| "acc_stderr": 0.03806142687309994, | |
| "acc_norm": 0.32413793103448274, | |
| "acc_norm_stderr": 0.03900432069185554 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.25925925925925924, | |
| "acc_stderr": 0.03785714465066654, | |
| "acc_norm": 0.23703703703703705, | |
| "acc_norm_stderr": 0.03673731683969506 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 5.307291666666667, | |
| "likelihood_difference_stderr": 0.5547099715245821, | |
| "pct_stereotype": 0.5, | |
| "pct_stereotype_stderr": 0.05933908290969268 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.2282958199356913, | |
| "acc_stderr": 0.023839303311398215, | |
| "acc_norm": 0.3022508038585209, | |
| "acc_norm_stderr": 0.02608270069539966 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 94.31955728859376, | |
| "ppl_stderr": 3.991574316908998, | |
| "acc": 0.25344459538133124, | |
| "acc_stderr": 0.0060601672763364745 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.21, | |
| "acc_stderr": 0.040936018074033256, | |
| "acc_norm": 0.28, | |
| "acc_norm_stderr": 0.045126085985421296 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.25163398692810457, | |
| "acc_stderr": 0.024848018263875192, | |
| "acc_norm": 0.34967320261437906, | |
| "acc_norm_stderr": 0.027305308076274702 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.27710843373493976, | |
| "acc_stderr": 0.034843315926805875, | |
| "acc_norm": 0.2891566265060241, | |
| "acc_norm_stderr": 0.03529486801511115 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 3.0603515625, | |
| "likelihood_difference_stderr": 0.2570312907090984, | |
| "pct_stereotype": 0.5125, | |
| "pct_stereotype_stderr": 0.02798587585995665 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.21, | |
| "acc_stderr": 0.040936018074033256, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.04560480215720683 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.2730496453900709, | |
| "acc_stderr": 0.02657786094330786, | |
| "acc_norm": 0.25886524822695034, | |
| "acc_norm_stderr": 0.02612957252718085 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.3482142857142857, | |
| "acc_stderr": 0.045218299028335865, | |
| "acc_norm": 0.2767857142857143, | |
| "acc_norm_stderr": 0.042466243366976256 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 3.5856606791338583, | |
| "likelihood_difference_stderr": 0.18118219123514714, | |
| "pct_stereotype": 0.5118110236220472, | |
| "pct_stereotype_stderr": 0.022199583294816923 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 3.8061655405405403, | |
| "likelihood_difference_stderr": 0.43453880510820464, | |
| "pct_stereotype": 0.6036036036036037, | |
| "pct_stereotype_stderr": 0.04663848326322447 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.22330097087378642, | |
| "acc_stderr": 0.04123553189891431, | |
| "acc_norm": 0.3106796116504854, | |
| "acc_norm_stderr": 0.04582124160161551 | |
| }, | |
| "sciq": { | |
| "acc": 0.664, | |
| "acc_stderr": 0.014944140233795028, | |
| "acc_norm": 0.572, | |
| "acc_norm_stderr": 0.01565442624502929 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.17763157894736842, | |
| "acc_stderr": 0.031103182383123387, | |
| "acc_norm": 0.34868421052631576, | |
| "acc_norm_stderr": 0.03878139888797609 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.23628691983122363, | |
| "acc_stderr": 0.027652153144159294, | |
| "acc_norm": 0.3080168776371308, | |
| "acc_norm_stderr": 0.030052389335605695 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 4.440149456521739, | |
| "likelihood_difference_stderr": 0.2261395575520835, | |
| "pct_stereotype": 0.3239130434782609, | |
| "pct_stereotype_stderr": 0.021842842500532617 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.04163331998932268, | |
| "acc_norm": 0.21, | |
| "acc_norm_stderr": 0.040936018074033256 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.3053435114503817, | |
| "acc_stderr": 0.040393149787245605, | |
| "acc_norm": 0.2824427480916031, | |
| "acc_norm_stderr": 0.03948406125768361 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.2993827160493827, | |
| "acc_stderr": 0.02548311560119546, | |
| "acc_norm": 0.23148148148148148, | |
| "acc_norm_stderr": 0.023468429832451145 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.03621034121889507, | |
| "acc_norm": 0.25, | |
| "acc_norm_stderr": 0.03621034121889507 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 4.878472222222222, | |
| "likelihood_difference_stderr": 0.4858540541132919, | |
| "pct_stereotype": 0.4666666666666667, | |
| "pct_stereotype_stderr": 0.05288198530254015 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.2948717948717949, | |
| "acc_stderr": 0.029872577708891162, | |
| "acc_norm": 0.3162393162393162, | |
| "acc_norm_stderr": 0.030463656747340247 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.3183673469387755, | |
| "acc_stderr": 0.029822533793982052, | |
| "acc_norm": 0.23265306122448978, | |
| "acc_norm_stderr": 0.02704925791589618 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.2066115702479339, | |
| "acc_stderr": 0.03695980128098823, | |
| "acc_norm": 0.4132231404958678, | |
| "acc_norm_stderr": 0.04495087843548408 | |
| }, | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.22486772486772486, | |
| "acc_stderr": 0.021502096078229147, | |
| "acc_norm": 0.20634920634920634, | |
| "acc_norm_stderr": 0.020842290930114676 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.2474747474747475, | |
| "acc_stderr": 0.030746300742124522, | |
| "acc_norm": 0.32323232323232326, | |
| "acc_norm_stderr": 0.033322999210706444 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 4.854619565217392, | |
| "likelihood_difference_stderr": 0.505869033934835, | |
| "pct_stereotype": 0.4956521739130435, | |
| "pct_stereotype_stderr": 0.04682752006203916 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.2631578947368421, | |
| "acc_stderr": 0.033773102522091945, | |
| "acc_norm": 0.30994152046783624, | |
| "acc_norm_stderr": 0.035469769593931624 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.22699386503067484, | |
| "acc_stderr": 0.032910995786157686, | |
| "acc_norm": 0.2883435582822086, | |
| "acc_norm_stderr": 0.035590395316173425 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 7.36919466403162, | |
| "likelihood_difference_stderr": 0.3929905019461457, | |
| "pct_stereotype": 0.2964426877470356, | |
| "pct_stereotype_stderr": 0.028768673758013903 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 4.103365384615385, | |
| "likelihood_difference_stderr": 1.0499970465523882, | |
| "pct_stereotype": 0.3076923076923077, | |
| "pct_stereotype_stderr": 0.13323467750529824 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.24193548387096775, | |
| "acc_stderr": 0.024362599693031086, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.02606936229533513 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816507, | |
| "acc_norm": 0.35, | |
| "acc_norm_stderr": 0.047937248544110196 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.27, | |
| "acc_stderr": 0.04461960433384739, | |
| "acc_norm": 0.3, | |
| "acc_norm_stderr": 0.046056618647183814 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.24837027379400262, | |
| "acc_stderr": 0.01103521259803449, | |
| "acc_norm": 0.27444589308996087, | |
| "acc_norm_stderr": 0.011397043163078154 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 4.869623655913978, | |
| "likelihood_difference_stderr": 0.5959735406192751, | |
| "pct_stereotype": 0.7849462365591398, | |
| "pct_stereotype_stderr": 0.04283507835554754 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.2565359477124183, | |
| "acc_stderr": 0.017667841612378984, | |
| "acc_norm": 0.25163398692810457, | |
| "acc_norm_stderr": 0.017555818091322256 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 3.8365162037037037, | |
| "likelihood_difference_stderr": 0.2671010238288838, | |
| "pct_stereotype": 0.4444444444444444, | |
| "pct_stereotype_stderr": 0.03388857118502326 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.3014705882352941, | |
| "acc_stderr": 0.027875982114273168, | |
| "acc_norm": 0.26838235294117646, | |
| "acc_norm_stderr": 0.02691748122437721 | |
| }, | |
| "winogrande": { | |
| "acc": 0.494869771112865, | |
| "acc_stderr": 0.014051745961790516 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.23333333333333334, | |
| "acc_stderr": 0.02144454730156047, | |
| "acc_norm": 0.2717948717948718, | |
| "acc_norm_stderr": 0.02255655101013236 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.3004484304932735, | |
| "acc_stderr": 0.030769352008229136, | |
| "acc_norm": 0.242152466367713, | |
| "acc_norm_stderr": 0.028751392398694755 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.19607843137254902, | |
| "acc_stderr": 0.03950581861179962, | |
| "acc_norm": 0.21568627450980393, | |
| "acc_norm_stderr": 0.04092563958237654 | |
| }, | |
| "logiqa": { | |
| "acc": 0.2227342549923195, | |
| "acc_stderr": 0.01632005404616512, | |
| "acc_norm": 0.27956989247311825, | |
| "acc_norm_stderr": 0.017602909186822453 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.19704433497536947, | |
| "acc_stderr": 0.02798672466673622, | |
| "acc_norm": 0.23645320197044334, | |
| "acc_norm_stderr": 0.02989611429173355 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.2909090909090909, | |
| "acc_stderr": 0.04350271442923243, | |
| "acc_norm": 0.2, | |
| "acc_norm_stderr": 0.038313051408846034 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.1757679180887372, | |
| "acc_stderr": 0.011122850863120485, | |
| "acc_norm": 0.21331058020477817, | |
| "acc_norm_stderr": 0.011970971742326334 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 2.652129120879121, | |
| "likelihood_difference_stderr": 0.2944534289937784, | |
| "pct_stereotype": 0.5164835164835165, | |
| "pct_stereotype_stderr": 0.05267597952306975 | |
| } | |
| }, | |
| "versions": { | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "piqa": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "crows_pairs_english_disability": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "crows_pairs_english": 0, | |
| "crows_pairs_french": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "crows_pairs_french_gender": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "wsc": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "arc_easy": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "crows_pairs_english_autre": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "crows_pairs_french_physical_appearance": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "lambada_openai": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "hendrycksTest-virology": 0, | |
| "crows_pairs_english_gender": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "crows_pairs_english_religion": 0, | |
| "hendrycksTest-management": 0, | |
| "sciq": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "crows_pairs_french_age": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "hendrycksTest-international_law": 0, | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "crows_pairs_french_autre": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "winogrande": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "logiqa": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "arc_challenge": 0, | |
| "crows_pairs_english_age": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "pretrained=EleutherAI/pythia-v1.1-70m,revision=step53000", | |
| "num_fewshot": 0, | |
| "batch_size": 16, | |
| "device": "cuda:5", | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |