Spaces:
Running
Running
| { | |
| "results": { | |
| "hendrycksTest-elementary_mathematics": { | |
| "acc": 0.21693121693121692, | |
| "acc_stderr": 0.021227082449445045, | |
| "acc_norm": 0.21164021164021163, | |
| "acc_norm_stderr": 0.02103733150526289 | |
| }, | |
| "hendrycksTest-high_school_world_history": { | |
| "acc": 0.1729957805907173, | |
| "acc_stderr": 0.024621562866768427, | |
| "acc_norm": 0.25316455696202533, | |
| "acc_norm_stderr": 0.028304657943035296 | |
| }, | |
| "winogrande": { | |
| "acc": 0.4972375690607735, | |
| "acc_stderr": 0.014052271211616445 | |
| }, | |
| "hendrycksTest-high_school_macroeconomics": { | |
| "acc": 0.21794871794871795, | |
| "acc_stderr": 0.020932445774463175, | |
| "acc_norm": 0.26153846153846155, | |
| "acc_norm_stderr": 0.02228214120420442 | |
| }, | |
| "hendrycksTest-machine_learning": { | |
| "acc": 0.24107142857142858, | |
| "acc_stderr": 0.04059867246952685, | |
| "acc_norm": 0.2857142857142857, | |
| "acc_norm_stderr": 0.04287858751340456 | |
| }, | |
| "hendrycksTest-college_physics": { | |
| "acc": 0.22549019607843138, | |
| "acc_stderr": 0.041583075330832865, | |
| "acc_norm": 0.23529411764705882, | |
| "acc_norm_stderr": 0.04220773659171452 | |
| }, | |
| "hendrycksTest-anatomy": { | |
| "acc": 0.23703703703703705, | |
| "acc_stderr": 0.03673731683969506, | |
| "acc_norm": 0.28888888888888886, | |
| "acc_norm_stderr": 0.0391545063041425 | |
| }, | |
| "hendrycksTest-management": { | |
| "acc": 0.23300970873786409, | |
| "acc_stderr": 0.04185832598928315, | |
| "acc_norm": 0.23300970873786409, | |
| "acc_norm_stderr": 0.04185832598928315 | |
| }, | |
| "hendrycksTest-college_medicine": { | |
| "acc": 0.21965317919075145, | |
| "acc_stderr": 0.031568093627031744, | |
| "acc_norm": 0.2658959537572254, | |
| "acc_norm_stderr": 0.03368762932259431 | |
| }, | |
| "hendrycksTest-conceptual_physics": { | |
| "acc": 0.28936170212765955, | |
| "acc_stderr": 0.029644006577009618, | |
| "acc_norm": 0.24680851063829787, | |
| "acc_norm_stderr": 0.02818544130123408 | |
| }, | |
| "hendrycksTest-professional_psychology": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.01681902837573638, | |
| "acc_norm": 0.24836601307189543, | |
| "acc_norm_stderr": 0.017479487001364764 | |
| }, | |
| "hendrycksTest-high_school_geography": { | |
| "acc": 0.21717171717171718, | |
| "acc_stderr": 0.029376616484945637, | |
| "acc_norm": 0.26262626262626265, | |
| "acc_norm_stderr": 0.031353050095330855 | |
| }, | |
| "crows_pairs_english_physical_appearance": { | |
| "likelihood_difference": 5.821180555555555, | |
| "likelihood_difference_stderr": 0.7528994326490429, | |
| "pct_stereotype": 0.5416666666666666, | |
| "pct_stereotype_stderr": 0.05913268547421811 | |
| }, | |
| "crows_pairs_english_disability": { | |
| "likelihood_difference": 7.460576923076923, | |
| "likelihood_difference_stderr": 1.1593364221878786, | |
| "pct_stereotype": 0.5076923076923077, | |
| "pct_stereotype_stderr": 0.062492603112584276 | |
| }, | |
| "hendrycksTest-logical_fallacies": { | |
| "acc": 0.17177914110429449, | |
| "acc_stderr": 0.02963471727237102, | |
| "acc_norm": 0.27607361963190186, | |
| "acc_norm_stderr": 0.0351238528370505 | |
| }, | |
| "hendrycksTest-security_studies": { | |
| "acc": 0.3183673469387755, | |
| "acc_stderr": 0.02982253379398205, | |
| "acc_norm": 0.21224489795918366, | |
| "acc_norm_stderr": 0.026176967197866767 | |
| }, | |
| "hendrycksTest-virology": { | |
| "acc": 0.16265060240963855, | |
| "acc_stderr": 0.028730237892613798, | |
| "acc_norm": 0.21084337349397592, | |
| "acc_norm_stderr": 0.031755547866299215 | |
| }, | |
| "hendrycksTest-us_foreign_policy": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.04163331998932269, | |
| "acc_norm": 0.22, | |
| "acc_norm_stderr": 0.0416333199893227 | |
| }, | |
| "hendrycksTest-econometrics": { | |
| "acc": 0.22807017543859648, | |
| "acc_stderr": 0.03947152782669415, | |
| "acc_norm": 0.32456140350877194, | |
| "acc_norm_stderr": 0.04404556157374768 | |
| }, | |
| "hendrycksTest-high_school_government_and_politics": { | |
| "acc": 0.21243523316062177, | |
| "acc_stderr": 0.029519282616817254, | |
| "acc_norm": 0.24870466321243523, | |
| "acc_norm_stderr": 0.03119584087770029 | |
| }, | |
| "logiqa": { | |
| "acc": 0.20890937019969277, | |
| "acc_stderr": 0.015945399396423896, | |
| "acc_norm": 0.24270353302611367, | |
| "acc_norm_stderr": 0.016815676206479523 | |
| }, | |
| "hendrycksTest-human_sexuality": { | |
| "acc": 0.2366412213740458, | |
| "acc_stderr": 0.03727673575596919, | |
| "acc_norm": 0.25190839694656486, | |
| "acc_norm_stderr": 0.038073871163060866 | |
| }, | |
| "crows_pairs_french_autre": { | |
| "likelihood_difference": 6.8173076923076925, | |
| "likelihood_difference_stderr": 1.7019864539843879, | |
| "pct_stereotype": 0.5384615384615384, | |
| "pct_stereotype_stderr": 0.14390989949130545 | |
| }, | |
| "crows_pairs_french_disability": { | |
| "likelihood_difference": 14.00189393939394, | |
| "likelihood_difference_stderr": 1.4054078154752692, | |
| "pct_stereotype": 0.36363636363636365, | |
| "pct_stereotype_stderr": 0.05966637484671757 | |
| }, | |
| "hendrycksTest-high_school_psychology": { | |
| "acc": 0.23486238532110093, | |
| "acc_stderr": 0.01817511051034359, | |
| "acc_norm": 0.24954128440366974, | |
| "acc_norm_stderr": 0.018553897629501624 | |
| }, | |
| "sciq": { | |
| "acc": 0.199, | |
| "acc_stderr": 0.012631649083099186, | |
| "acc_norm": 0.218, | |
| "acc_norm_stderr": 0.013063179040595282 | |
| }, | |
| "crows_pairs_french_socioeconomic": { | |
| "likelihood_difference": 11.76251594387755, | |
| "likelihood_difference_stderr": 0.7977323948668976, | |
| "pct_stereotype": 0.3673469387755102, | |
| "pct_stereotype_stderr": 0.03452261728704165 | |
| }, | |
| "crows_pairs_french_religion": { | |
| "likelihood_difference": 11.039673913043478, | |
| "likelihood_difference_stderr": 0.8746581584196995, | |
| "pct_stereotype": 0.6608695652173913, | |
| "pct_stereotype_stderr": 0.04433930011819816 | |
| }, | |
| "hendrycksTest-jurisprudence": { | |
| "acc": 0.18518518518518517, | |
| "acc_stderr": 0.03755265865037181, | |
| "acc_norm": 0.24074074074074073, | |
| "acc_norm_stderr": 0.041331194402438376 | |
| }, | |
| "crows_pairs_french_gender": { | |
| "likelihood_difference": 7.126947040498442, | |
| "likelihood_difference_stderr": 0.41934707639834146, | |
| "pct_stereotype": 0.48598130841121495, | |
| "pct_stereotype_stderr": 0.027939861549302374 | |
| }, | |
| "hendrycksTest-college_mathematics": { | |
| "acc": 0.13, | |
| "acc_stderr": 0.03379976689896309, | |
| "acc_norm": 0.21, | |
| "acc_norm_stderr": 0.040936018074033256 | |
| }, | |
| "arc_challenge": { | |
| "acc": 0.20819112627986347, | |
| "acc_stderr": 0.01186486611844807, | |
| "acc_norm": 0.24488054607508533, | |
| "acc_norm_stderr": 0.012566273985131356 | |
| }, | |
| "hendrycksTest-clinical_knowledge": { | |
| "acc": 0.1660377358490566, | |
| "acc_stderr": 0.022902064724569966, | |
| "acc_norm": 0.3018867924528302, | |
| "acc_norm_stderr": 0.028254200344438655 | |
| }, | |
| "crows_pairs_english_autre": { | |
| "likelihood_difference": 5.184659090909091, | |
| "likelihood_difference_stderr": 2.7270102264769593, | |
| "pct_stereotype": 0.6363636363636364, | |
| "pct_stereotype_stderr": 0.15212000482437738 | |
| }, | |
| "crows_pairs_english_socioeconomic": { | |
| "likelihood_difference": 5.49078947368421, | |
| "likelihood_difference_stderr": 0.48194076993788276, | |
| "pct_stereotype": 0.6157894736842106, | |
| "pct_stereotype_stderr": 0.03538097998767891 | |
| }, | |
| "hendrycksTest-medical_genetics": { | |
| "acc": 0.28, | |
| "acc_stderr": 0.04512608598542128, | |
| "acc_norm": 0.24, | |
| "acc_norm_stderr": 0.04292346959909283 | |
| }, | |
| "crows_pairs_english_race_color": { | |
| "likelihood_difference": 5.203678641732283, | |
| "likelihood_difference_stderr": 0.3157259260594444, | |
| "pct_stereotype": 0.33267716535433073, | |
| "pct_stereotype_stderr": 0.02092548388333584 | |
| }, | |
| "hendrycksTest-human_aging": { | |
| "acc": 0.2645739910313901, | |
| "acc_stderr": 0.029605103217038315, | |
| "acc_norm": 0.2825112107623318, | |
| "acc_norm_stderr": 0.03021683101150877 | |
| }, | |
| "crows_pairs_french_nationality": { | |
| "likelihood_difference": 8.90736166007905, | |
| "likelihood_difference_stderr": 0.48994737960034646, | |
| "pct_stereotype": 0.5177865612648221, | |
| "pct_stereotype_stderr": 0.03147710419094347 | |
| }, | |
| "hendrycksTest-high_school_statistics": { | |
| "acc": 0.2222222222222222, | |
| "acc_stderr": 0.028353212866863445, | |
| "acc_norm": 0.2824074074074074, | |
| "acc_norm_stderr": 0.030701372111510937 | |
| }, | |
| "hendrycksTest-philosophy": { | |
| "acc": 0.2508038585209003, | |
| "acc_stderr": 0.024619771956697168, | |
| "acc_norm": 0.2765273311897106, | |
| "acc_norm_stderr": 0.02540383297817961 | |
| }, | |
| "hendrycksTest-miscellaneous": { | |
| "acc": 0.24265644955300128, | |
| "acc_stderr": 0.01532988894089986, | |
| "acc_norm": 0.26053639846743293, | |
| "acc_norm_stderr": 0.01569600856380707 | |
| }, | |
| "hendrycksTest-professional_law": { | |
| "acc": 0.2242503259452412, | |
| "acc_stderr": 0.010652615824906163, | |
| "acc_norm": 0.2529335071707953, | |
| "acc_norm_stderr": 0.011102268713839989 | |
| }, | |
| "hendrycksTest-astronomy": { | |
| "acc": 0.19736842105263158, | |
| "acc_stderr": 0.03238981601699397, | |
| "acc_norm": 0.24342105263157895, | |
| "acc_norm_stderr": 0.034923496688842384 | |
| }, | |
| "crows_pairs_english_nationality": { | |
| "likelihood_difference": 5.985243055555555, | |
| "likelihood_difference_stderr": 0.48650219522921606, | |
| "pct_stereotype": 0.3333333333333333, | |
| "pct_stereotype_stderr": 0.03214952147802749 | |
| }, | |
| "hendrycksTest-college_chemistry": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.04229525846816506, | |
| "acc_norm": 0.27, | |
| "acc_norm_stderr": 0.0446196043338474 | |
| }, | |
| "hendrycksTest-high_school_chemistry": { | |
| "acc": 0.22660098522167488, | |
| "acc_stderr": 0.029454863835292968, | |
| "acc_norm": 0.21674876847290642, | |
| "acc_norm_stderr": 0.028990331252516235 | |
| }, | |
| "hendrycksTest-high_school_european_history": { | |
| "acc": 0.17575757575757575, | |
| "acc_stderr": 0.02972094300622445, | |
| "acc_norm": 0.23030303030303031, | |
| "acc_norm_stderr": 0.03287666758603488 | |
| }, | |
| "hendrycksTest-high_school_us_history": { | |
| "acc": 0.19117647058823528, | |
| "acc_stderr": 0.027599174300640766, | |
| "acc_norm": 0.2696078431372549, | |
| "acc_norm_stderr": 0.031145570659486782 | |
| }, | |
| "hendrycksTest-college_computer_science": { | |
| "acc": 0.23, | |
| "acc_stderr": 0.042295258468165044, | |
| "acc_norm": 0.22, | |
| "acc_norm_stderr": 0.04163331998932269 | |
| }, | |
| "crows_pairs_french_race_color": { | |
| "likelihood_difference": 8.887228260869565, | |
| "likelihood_difference_stderr": 0.3298429589043161, | |
| "pct_stereotype": 0.717391304347826, | |
| "pct_stereotype_stderr": 0.02101669741793868 | |
| }, | |
| "crows_pairs_french": { | |
| "likelihood_difference": 9.346535852713178, | |
| "likelihood_difference_stderr": 0.21437404785240546, | |
| "pct_stereotype": 0.5742397137745975, | |
| "pct_stereotype_stderr": 0.012077920863042001 | |
| }, | |
| "hendrycksTest-world_religions": { | |
| "acc": 0.17543859649122806, | |
| "acc_stderr": 0.02917088550072768, | |
| "acc_norm": 0.2222222222222222, | |
| "acc_norm_stderr": 0.03188578017686399 | |
| }, | |
| "hendrycksTest-global_facts": { | |
| "acc": 0.21, | |
| "acc_stderr": 0.040936018074033256, | |
| "acc_norm": 0.22, | |
| "acc_norm_stderr": 0.04163331998932269 | |
| }, | |
| "hendrycksTest-abstract_algebra": { | |
| "acc": 0.2, | |
| "acc_stderr": 0.04020151261036843, | |
| "acc_norm": 0.15, | |
| "acc_norm_stderr": 0.035887028128263734 | |
| }, | |
| "hendrycksTest-business_ethics": { | |
| "acc": 0.32, | |
| "acc_stderr": 0.04688261722621504, | |
| "acc_norm": 0.27, | |
| "acc_norm_stderr": 0.0446196043338474 | |
| }, | |
| "hendrycksTest-international_law": { | |
| "acc": 0.09917355371900827, | |
| "acc_stderr": 0.027285246312758957, | |
| "acc_norm": 0.2809917355371901, | |
| "acc_norm_stderr": 0.04103203830514512 | |
| }, | |
| "hendrycksTest-college_biology": { | |
| "acc": 0.2013888888888889, | |
| "acc_stderr": 0.03353647469713839, | |
| "acc_norm": 0.2361111111111111, | |
| "acc_norm_stderr": 0.03551446610810826 | |
| }, | |
| "wsc": { | |
| "acc": 0.375, | |
| "acc_stderr": 0.04770204856076104 | |
| }, | |
| "hendrycksTest-high_school_microeconomics": { | |
| "acc": 0.18067226890756302, | |
| "acc_stderr": 0.024991964966600753, | |
| "acc_norm": 0.2857142857142857, | |
| "acc_norm_stderr": 0.029344572500634342 | |
| }, | |
| "hendrycksTest-prehistory": { | |
| "acc": 0.2623456790123457, | |
| "acc_stderr": 0.02447722285613512, | |
| "acc_norm": 0.23765432098765432, | |
| "acc_norm_stderr": 0.023683591837008557 | |
| }, | |
| "hendrycksTest-professional_accounting": { | |
| "acc": 0.2695035460992908, | |
| "acc_stderr": 0.02646903681859063, | |
| "acc_norm": 0.2801418439716312, | |
| "acc_norm_stderr": 0.02678917235114024 | |
| }, | |
| "piqa": { | |
| "acc": 0.5402611534276387, | |
| "acc_stderr": 0.011627942981817168, | |
| "acc_norm": 0.5195865070729053, | |
| "acc_norm_stderr": 0.011656869979288454 | |
| }, | |
| "hendrycksTest-moral_scenarios": { | |
| "acc": 0.23798882681564246, | |
| "acc_stderr": 0.014242630070574915, | |
| "acc_norm": 0.27262569832402234, | |
| "acc_norm_stderr": 0.014893391735249588 | |
| }, | |
| "hendrycksTest-computer_security": { | |
| "acc": 0.25, | |
| "acc_stderr": 0.04351941398892446, | |
| "acc_norm": 0.29, | |
| "acc_norm_stderr": 0.045604802157206845 | |
| }, | |
| "hendrycksTest-high_school_biology": { | |
| "acc": 0.19032258064516128, | |
| "acc_stderr": 0.02233170761182307, | |
| "acc_norm": 0.24516129032258063, | |
| "acc_norm_stderr": 0.02447224384089552 | |
| }, | |
| "hendrycksTest-high_school_mathematics": { | |
| "acc": 0.1814814814814815, | |
| "acc_stderr": 0.023499264669407306, | |
| "acc_norm": 0.27037037037037037, | |
| "acc_norm_stderr": 0.027080372815145658 | |
| }, | |
| "crows_pairs_english": { | |
| "likelihood_difference": 5.231738223017293, | |
| "likelihood_difference_stderr": 0.1788777548247783, | |
| "pct_stereotype": 0.43530113297555156, | |
| "pct_stereotype_stderr": 0.012110619233278561 | |
| }, | |
| "crows_pairs_english_sexual_orientation": { | |
| "likelihood_difference": 5.415994623655914, | |
| "likelihood_difference_stderr": 0.7173989239033504, | |
| "pct_stereotype": 0.45161290322580644, | |
| "pct_stereotype_stderr": 0.051883930752016603 | |
| }, | |
| "hendrycksTest-nutrition": { | |
| "acc": 0.20261437908496732, | |
| "acc_stderr": 0.023015446877985693, | |
| "acc_norm": 0.2647058823529412, | |
| "acc_norm_stderr": 0.02526169121972948 | |
| }, | |
| "hendrycksTest-sociology": { | |
| "acc": 0.2885572139303483, | |
| "acc_stderr": 0.0320384104021332, | |
| "acc_norm": 0.31343283582089554, | |
| "acc_norm_stderr": 0.03280188205348642 | |
| }, | |
| "hendrycksTest-high_school_computer_science": { | |
| "acc": 0.22, | |
| "acc_stderr": 0.041633319989322695, | |
| "acc_norm": 0.36, | |
| "acc_norm_stderr": 0.04824181513244218 | |
| }, | |
| "crows_pairs_english_age": { | |
| "likelihood_difference": 3.3361950549450547, | |
| "likelihood_difference_stderr": 0.5594307162046473, | |
| "pct_stereotype": 0.45054945054945056, | |
| "pct_stereotype_stderr": 0.052446231001012276 | |
| }, | |
| "hendrycksTest-high_school_physics": { | |
| "acc": 0.1986754966887417, | |
| "acc_stderr": 0.032578473844367774, | |
| "acc_norm": 0.2781456953642384, | |
| "acc_norm_stderr": 0.036586032627637426 | |
| }, | |
| "hendrycksTest-public_relations": { | |
| "acc": 0.3, | |
| "acc_stderr": 0.04389311454644287, | |
| "acc_norm": 0.17272727272727273, | |
| "acc_norm_stderr": 0.03620691833929219 | |
| }, | |
| "arc_easy": { | |
| "acc": 0.2706228956228956, | |
| "acc_stderr": 0.009116466166403832, | |
| "acc_norm": 0.26515151515151514, | |
| "acc_norm_stderr": 0.009057621139172618 | |
| }, | |
| "hendrycksTest-moral_disputes": { | |
| "acc": 0.21676300578034682, | |
| "acc_stderr": 0.022183477668412853, | |
| "acc_norm": 0.20520231213872833, | |
| "acc_norm_stderr": 0.021742519835276277 | |
| }, | |
| "hendrycksTest-marketing": { | |
| "acc": 0.19230769230769232, | |
| "acc_stderr": 0.025819233256483713, | |
| "acc_norm": 0.24358974358974358, | |
| "acc_norm_stderr": 0.0281209665039144 | |
| }, | |
| "hendrycksTest-electrical_engineering": { | |
| "acc": 0.21379310344827587, | |
| "acc_stderr": 0.03416520447747549, | |
| "acc_norm": 0.20689655172413793, | |
| "acc_norm_stderr": 0.03375672449560554 | |
| }, | |
| "crows_pairs_french_physical_appearance": { | |
| "likelihood_difference": 9.36111111111111, | |
| "likelihood_difference_stderr": 1.299896180819777, | |
| "pct_stereotype": 0.5833333333333334, | |
| "pct_stereotype_stderr": 0.05850912479161746 | |
| }, | |
| "hendrycksTest-formal_logic": { | |
| "acc": 0.29365079365079366, | |
| "acc_stderr": 0.040735243221471276, | |
| "acc_norm": 0.2777777777777778, | |
| "acc_norm_stderr": 0.04006168083848876 | |
| }, | |
| "crows_pairs_english_religion": { | |
| "likelihood_difference": 5.315596846846847, | |
| "likelihood_difference_stderr": 0.6585436977642254, | |
| "pct_stereotype": 0.40540540540540543, | |
| "pct_stereotype_stderr": 0.046812183988348 | |
| }, | |
| "hendrycksTest-professional_medicine": { | |
| "acc": 0.2426470588235294, | |
| "acc_stderr": 0.026040662474201264, | |
| "acc_norm": 0.28308823529411764, | |
| "acc_norm_stderr": 0.02736586113151381 | |
| }, | |
| "lambada_openai": { | |
| "ppl": 2347965.083490206, | |
| "ppl_stderr": 208687.88666648002, | |
| "acc": 0.0, | |
| "acc_stderr": 0.0 | |
| }, | |
| "crows_pairs_english_gender": { | |
| "likelihood_difference": 4.4865234375, | |
| "likelihood_difference_stderr": 0.46807997363511794, | |
| "pct_stereotype": 0.515625, | |
| "pct_stereotype_stderr": 0.027980952958187033 | |
| }, | |
| "crows_pairs_french_sexual_orientation": { | |
| "likelihood_difference": 14.45467032967033, | |
| "likelihood_difference_stderr": 1.0156884564726079, | |
| "pct_stereotype": 0.8021978021978022, | |
| "pct_stereotype_stderr": 0.04198895203196222 | |
| }, | |
| "crows_pairs_french_age": { | |
| "likelihood_difference": 5.195138888888889, | |
| "likelihood_difference_stderr": 0.6908977353059731, | |
| "pct_stereotype": 0.5777777777777777, | |
| "pct_stereotype_stderr": 0.05235473399540658 | |
| } | |
| }, | |
| "versions": { | |
| "hendrycksTest-elementary_mathematics": 0, | |
| "hendrycksTest-high_school_world_history": 0, | |
| "winogrande": 0, | |
| "hendrycksTest-high_school_macroeconomics": 0, | |
| "hendrycksTest-machine_learning": 0, | |
| "hendrycksTest-college_physics": 0, | |
| "hendrycksTest-anatomy": 0, | |
| "hendrycksTest-management": 0, | |
| "hendrycksTest-college_medicine": 0, | |
| "hendrycksTest-conceptual_physics": 0, | |
| "hendrycksTest-professional_psychology": 0, | |
| "hendrycksTest-high_school_geography": 0, | |
| "crows_pairs_english_physical_appearance": 0, | |
| "crows_pairs_english_disability": 0, | |
| "hendrycksTest-logical_fallacies": 0, | |
| "hendrycksTest-security_studies": 0, | |
| "hendrycksTest-virology": 0, | |
| "hendrycksTest-us_foreign_policy": 0, | |
| "hendrycksTest-econometrics": 0, | |
| "hendrycksTest-high_school_government_and_politics": 0, | |
| "logiqa": 0, | |
| "hendrycksTest-human_sexuality": 0, | |
| "crows_pairs_french_autre": 0, | |
| "crows_pairs_french_disability": 0, | |
| "hendrycksTest-high_school_psychology": 0, | |
| "sciq": 0, | |
| "crows_pairs_french_socioeconomic": 0, | |
| "crows_pairs_french_religion": 0, | |
| "hendrycksTest-jurisprudence": 0, | |
| "crows_pairs_french_gender": 0, | |
| "hendrycksTest-college_mathematics": 0, | |
| "arc_challenge": 0, | |
| "hendrycksTest-clinical_knowledge": 0, | |
| "crows_pairs_english_autre": 0, | |
| "crows_pairs_english_socioeconomic": 0, | |
| "hendrycksTest-medical_genetics": 0, | |
| "crows_pairs_english_race_color": 0, | |
| "hendrycksTest-human_aging": 0, | |
| "crows_pairs_french_nationality": 0, | |
| "hendrycksTest-high_school_statistics": 0, | |
| "hendrycksTest-philosophy": 0, | |
| "hendrycksTest-miscellaneous": 0, | |
| "hendrycksTest-professional_law": 0, | |
| "hendrycksTest-astronomy": 0, | |
| "crows_pairs_english_nationality": 0, | |
| "hendrycksTest-college_chemistry": 0, | |
| "hendrycksTest-high_school_chemistry": 0, | |
| "hendrycksTest-high_school_european_history": 0, | |
| "hendrycksTest-high_school_us_history": 0, | |
| "hendrycksTest-college_computer_science": 0, | |
| "crows_pairs_french_race_color": 0, | |
| "crows_pairs_french": 0, | |
| "hendrycksTest-world_religions": 0, | |
| "hendrycksTest-global_facts": 0, | |
| "hendrycksTest-abstract_algebra": 0, | |
| "hendrycksTest-business_ethics": 0, | |
| "hendrycksTest-international_law": 0, | |
| "hendrycksTest-college_biology": 0, | |
| "wsc": 0, | |
| "hendrycksTest-high_school_microeconomics": 0, | |
| "hendrycksTest-prehistory": 0, | |
| "hendrycksTest-professional_accounting": 0, | |
| "piqa": 0, | |
| "hendrycksTest-moral_scenarios": 0, | |
| "hendrycksTest-computer_security": 0, | |
| "hendrycksTest-high_school_biology": 0, | |
| "hendrycksTest-high_school_mathematics": 0, | |
| "crows_pairs_english": 0, | |
| "crows_pairs_english_sexual_orientation": 0, | |
| "hendrycksTest-nutrition": 0, | |
| "hendrycksTest-sociology": 0, | |
| "hendrycksTest-high_school_computer_science": 0, | |
| "crows_pairs_english_age": 0, | |
| "hendrycksTest-high_school_physics": 0, | |
| "hendrycksTest-public_relations": 0, | |
| "arc_easy": 0, | |
| "hendrycksTest-moral_disputes": 0, | |
| "hendrycksTest-marketing": 0, | |
| "hendrycksTest-electrical_engineering": 0, | |
| "crows_pairs_french_physical_appearance": 0, | |
| "hendrycksTest-formal_logic": 0, | |
| "crows_pairs_english_religion": 0, | |
| "hendrycksTest-professional_medicine": 0, | |
| "lambada_openai": 0, | |
| "crows_pairs_english_gender": 0, | |
| "crows_pairs_french_sexual_orientation": 0, | |
| "crows_pairs_french_age": 0 | |
| }, | |
| "config": { | |
| "model": "hf-causal", | |
| "model_args": "use_accelerate=True,pretrained=EleutherAI/pythia-v1.1-70m,revision=step64", | |
| "num_fewshot": 0, | |
| "batch_size": 32, | |
| "device": null, | |
| "no_cache": true, | |
| "limit": null, | |
| "bootstrap_iters": 100000, | |
| "description_dict": {} | |
| } | |
| } |