sykuang's picture
MLX 4bit quantized fork of lianghsun/Llama-3.2-Taiwan-3B-Instruct for vChewing autocomplete
07458f1 verified
metadata
license: llama3.2
language:
  - zh
  - en
  - it
  - de
  - fr
  - ja
  - ko
base_model: lianghsun/Llama-3.2-Taiwan-3B-Instruct
datasets:
  - lianghsun/tw-emergency-medicine-bench
  - lianghsun/tw-legal-nlp
  - lianghsun/tw-legal-synthetic-qa
  - lianghsun/tw-law-article-qa
  - lianghsun/tw-judgment-qa
  - lianghsun/tw-judgment-gist-chat
  - lianghsun/tw-bar-examination-2020-chat
  - lianghsun/tw-structured-law-article
  - lianghsun/tw-judgment-gist-chat
  - lianghsun/tw-contract-review-chat
  - lianghsun/reasoning-base-20k-chat
  - lianghsun/vulnerability-mitigation-qa-zh_tw
  - lianghsun/tw-instruct
  - rombodawg/Everything_Instruct_Multilingual
  - xzuyn/manythings-translations-alpaca
  - neural-bridge/rag-dataset-12000
  - minyichen/glaive_toolcall_zh_tw
pipeline_tag: text-generation
library_name: mlx
tags:
  - Taiwan
  - ROC
  - zh-tw
  - instruct
  - chat
  - llama3.2
  - SLM
  - mlx
widget:
  - text: 中華民國憲法第一條
metrics:
  - accuracy
model-index:
  - name: Llama-3.2-Taiwan-3B-Instruct
    results:
      - task:
          type: text-generation
          name: Single Choice Question
        dataset:
          name: tw-legal-benchmark-v1
          type: lianghsun/tw-legal-benchmark-v1
        metrics:
          - type: accuracy
            value: 31.1
            name: single choice
      - task:
          type: text-generation
          name: Single Choice Question
        dataset:
          name: (Society) Formosa Taiwan Knowledge Bench
          type: lianghsun/Formosa-bench
          config: society
          split: test
          revision: v2024.11.27
        metrics:
          - type: accuracy
            value: 60.42
            name: single choice
      - task:
          type: text-generation
          name: Single Choice Question
        dataset:
          name: (Governmnt) Formosa Taiwan Knowledge Bench
          type: lianghsun/Formosa-bench
          config: governmnt
          split: test
          revision: v2024.11.27
        metrics:
          - type: accuracy
            value: 44.25
            name: single choice
      - task:
          type: text-generation
          name: Single Choice Question
        dataset:
          name: (Geography) Formosa Taiwan Knowledge Bench
          type: lianghsun/Formosa-bench
          config: geography
          split: test
          revision: v2024.11.27
        metrics:
          - type: accuracy
            value: 47.54
            name: single choice
      - task:
          type: text-generation
          name: Single Choice Question
        dataset:
          name: (History) Formosa Taiwan Knowledge Bench
          type: lianghsun/Formosa-bench
          config: history
          split: test
          revision: v2024.11.27
        metrics:
          - type: accuracy
            value: 60
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (geography_of_taiwan) tmmlu++
          type: ikala/tmmluplus
          config: geography_of_taiwan
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 36.2
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (dentistry) tmmlu++
          type: ikala/tmmluplus
          config: dentistry
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 33.83
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (technical) tmmlu++
          type: ikala/tmmluplus
          config: technical
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 35.07
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (statistics_and_machine_learning) tmmlu++
          type: ikala/tmmluplus
          config: statistics_and_machine_learning
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 28.57
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (clinical_psychology) tmmlu++
          type: ikala/tmmluplus
          config: clinical_psychology
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 29.6
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (tve_design) tmmlu++
          type: ikala/tmmluplus
          config: tve_design
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 38.54
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (three_principles_of_people) tmmlu++
          type: ikala/tmmluplus
          config: three_principles_of_people
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 48.2
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (introduction_to_law) tmmlu++
          type: ikala/tmmluplus
          config: introduction_to_law
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 29.96
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (linear_algebra) tmmlu++
          type: ikala/tmmluplus
          config: linear_algebra
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 21.43
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (agriculture) tmmlu++
          type: ikala/tmmluplus
          config: agriculture
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 24.5
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (jce_humanities) tmmlu++
          type: ikala/tmmluplus
          config: jce_humanities
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 38.89
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (music) tmmlu++
          type: ikala/tmmluplus
          config: music
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.9
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (secondary_physics) tmmlu++
          type: ikala/tmmluplus
          config: secondary_physics
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 33.04
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (physics) tmmlu++
          type: ikala/tmmluplus
          config: physics
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.84
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (advance_chemistry) tmmlu++
          type: ikala/tmmluplus
          config: advance_chemistry
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.64
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (junior_science_exam) tmmlu++
          type: ikala/tmmluplus
          config: junior_science_exam
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 30.05
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (veterinary_pathology) tmmlu++
          type: ikala/tmmluplus
          config: veterinary_pathology
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.09
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (financial_analysis) tmmlu++
          type: ikala/tmmluplus
          config: financial_analysis
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.13
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (national_protection) tmmlu++
          type: ikala/tmmluplus
          config: national_protection
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 42.65
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (macroeconomics) tmmlu++
          type: ikala/tmmluplus
          config: macroeconomics
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 26.76
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (politic_science) tmmlu++
          type: ikala/tmmluplus
          config: politic_science
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.44
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (ttqav2) tmmlu++
          type: ikala/tmmluplus
          config: ttqav2
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 61.06
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (junior_chinese_exam) tmmlu++
          type: ikala/tmmluplus
          config: junior_chinese_exam
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 30.86
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (traditional_chinese_medicine_clinical_medicine) tmmlu++
          type: ikala/tmmluplus
          config: traditional_chinese_medicine_clinical_medicine
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.9
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (junior_math_exam) tmmlu++
          type: ikala/tmmluplus
          config: junior_math_exam
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 21.71
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (auditing) tmmlu++
          type: ikala/tmmluplus
          config: auditing
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 21.82
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (anti_money_laundering) tmmlu++
          type: ikala/tmmluplus
          config: anti_money_laundering
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 37.31
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (pharmacology) tmmlu++
          type: ikala/tmmluplus
          config: pharmacology
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 30.68
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (trust_practice) tmmlu++
          type: ikala/tmmluplus
          config: trust_practice
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 28.18
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (tve_mathematics) tmmlu++
          type: ikala/tmmluplus
          config: tve_mathematics
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 18.67
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (human_behavior) tmmlu++
          type: ikala/tmmluplus
          config: human_behavior
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 32.04
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (pharmacy) tmmlu++
          type: ikala/tmmluplus
          config: pharmacy
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 22.76
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (tve_chinese_language) tmmlu++
          type: ikala/tmmluplus
          config: tve_chinese_language
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 36.65
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (optometry) tmmlu++
          type: ikala/tmmluplus
          config: optometry
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.11
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (physical_education) tmmlu++
          type: ikala/tmmluplus
          config: physical_education
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 30.73
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (organic_chemistry) tmmlu++
          type: ikala/tmmluplus
          config: organic_chemistry
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 35.78
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (tve_natural_sciences) tmmlu++
          type: ikala/tmmluplus
          config: tve_natural_sciences
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 33.73
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (education) tmmlu++
          type: ikala/tmmluplus
          config: education
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 37.9
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (mechanical) tmmlu++
          type: ikala/tmmluplus
          config: mechanical
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 42.37
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (taiwanese_hokkien) tmmlu++
          type: ikala/tmmluplus
          config: taiwanese_hokkien
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 14.73
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (nautical_science) tmmlu++
          type: ikala/tmmluplus
          config: nautical_science
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 30.49
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (business_management) tmmlu++
          type: ikala/tmmluplus
          config: business_management
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 39.57
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (logic_reasoning) tmmlu++
          type: ikala/tmmluplus
          config: logic_reasoning
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.34
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (marketing_management) tmmlu++
          type: ikala/tmmluplus
          config: marketing_management
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 39.78
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (economics) tmmlu++
          type: ikala/tmmluplus
          config: economics
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.95
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (basic_medical_science) tmmlu++
          type: ikala/tmmluplus
          config: basic_medical_science
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 28.41
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (occupational_therapy_for_psychological_disorders) tmmlu++
          type: ikala/tmmluplus
          config: occupational_therapy_for_psychological_disorders
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 35.73
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (general_principles_of_law) tmmlu++
          type: ikala/tmmluplus
          config: general_principles_of_law
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 31.13
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (junior_chemistry) tmmlu++
          type: ikala/tmmluplus
          config: junior_chemistry
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 24.88
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (veterinary_pharmacology) tmmlu++
          type: ikala/tmmluplus
          config: veterinary_pharmacology
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 36.3
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (educational_psychology) tmmlu++
          type: ikala/tmmluplus
          config: educational_psychology
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 33.52
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (finance_banking) tmmlu++
          type: ikala/tmmluplus
          config: finance_banking
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 32.59
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (official_document_management) tmmlu++
          type: ikala/tmmluplus
          config: official_document_management
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 32.43
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (fire_science) tmmlu++
          type: ikala/tmmluplus
          config: fire_science
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 30.65
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (junior_social_studies) tmmlu++
          type: ikala/tmmluplus
          config: junior_social_studies
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 47.62
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (accounting) tmmlu++
          type: ikala/tmmluplus
          config: accounting
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 20.94
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (engineering_math) tmmlu++
          type: ikala/tmmluplus
          config: engineering_math
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.18
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (education_(profession_level)) tmmlu++
          type: ikala/tmmluplus
          config: education_(profession_level)
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 24.07
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (chinese_language_and_literature) tmmlu++
          type: ikala/tmmluplus
          config: chinese_language_and_literature
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.64
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (management_accounting) tmmlu++
          type: ikala/tmmluplus
          config: management_accounting
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 24.19
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (culinary_skills) tmmlu++
          type: ikala/tmmluplus
          config: culinary_skills
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 39.38
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (administrative_law) tmmlu++
          type: ikala/tmmluplus
          config: administrative_law
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 25.71
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (insurance_studies) tmmlu++
          type: ikala/tmmluplus
          config: insurance_studies
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 33.42
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (real_estate) tmmlu++
          type: ikala/tmmluplus
          config: real_estate
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 22.83
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (computer_science) tmmlu++
          type: ikala/tmmluplus
          config: computer_science
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 31.61
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (taxation) tmmlu++
          type: ikala/tmmluplus
          config: taxation
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 27.47
            name: single choice
      - task:
          type: question-answering
          name: Single Choice Question
        dataset:
          name: (trade) tmmlu++
          type: ikala/tmmluplus
          config: trade
          split: test
          revision: c0e8ae955997300d5dbf0e382bf0ba5115f85e8c
        metrics:
          - type: accuracy
            value: 20.32
            name: single choice