| { |
| "config": { |
| "model_X": "Qwen/Qwen2.5-3B-Instruct", |
| "model_Y": "meta-llama/Llama-3.2-3B-Instruct", |
| "hub_repo": "CK0607/cross-model-lora-prediction-3b", |
| "round8_real_generation_eval": true, |
| "no_surrogate": true, |
| "no_retraining": true, |
| "eval_examples_requested": 300, |
| "generation": { |
| "do_sample": false, |
| "num_beams": 1, |
| "greedy": true, |
| "max_new_tokens_code": 96, |
| "max_new_tokens_other": 24 |
| }, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "heldouts": [ |
| "gsm_hard", |
| "gsm8k_test_500", |
| "mbpp_test_held", |
| "mbpp_plus", |
| "openbookqa_test" |
| ], |
| "subexperiments": { |
| "1A_k_sweep": { |
| "N": 24, |
| "seed": 0, |
| "K_values": [ |
| 2, |
| 4, |
| 6, |
| 8, |
| 12, |
| 16, |
| 20, |
| 24 |
| ], |
| "cells": 40 |
| }, |
| "1B_pertensor_methods": { |
| "N_values": [ |
| 12, |
| 16, |
| 24 |
| ], |
| "methods": [ |
| "pertensor_ridge", |
| "procrustes", |
| "pertensor_pca" |
| ], |
| "seeds_for_subsampled_N": [ |
| 0, |
| 1, |
| 2 |
| ], |
| "N24_seed": 0, |
| "cells": 105 |
| }, |
| "1C_per_task_breakdown": { |
| "derived_from": "results_round6.json records plus R8 N=16 records", |
| "new_compute": false |
| } |
| }, |
| "budget_reduction": null, |
| "wall_seconds": 3221.695 |
| }, |
| "adapter_verification": { |
| "listing": { |
| "round4/X": [ |
| "aqua_rat", |
| "arc_challenge", |
| "arc_easy", |
| "gsm8k", |
| "gsm8k_test_500", |
| "gsm_hard", |
| "humaneval", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "mbpp_plus", |
| "mbpp_sanitized", |
| "mbpp_test_held", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics", |
| "multiarith", |
| "openbookqa", |
| "openbookqa_test", |
| "sciq", |
| "svamp" |
| ], |
| "round4/Y": [ |
| "aqua_rat", |
| "arc_challenge", |
| "arc_easy", |
| "gsm8k", |
| "gsm8k_test_500", |
| "gsm_hard", |
| "humaneval", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "mbpp_plus", |
| "mbpp_sanitized", |
| "mbpp_test_held", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics", |
| "multiarith", |
| "openbookqa", |
| "openbookqa_test", |
| "sciq", |
| "svamp" |
| ], |
| "round5/X": [ |
| "aqua_rat_numeric", |
| "conala_curated", |
| "humaneval", |
| "math_counting_easy", |
| "mawps", |
| "mbpp_sanitized", |
| "medmcqa_easy", |
| "pubmedqa_pqal" |
| ], |
| "round5/Y": [ |
| "aqua_rat_numeric", |
| "conala_curated", |
| "humaneval", |
| "math_counting_easy", |
| "mawps", |
| "mbpp_sanitized", |
| "medmcqa_easy", |
| "pubmedqa_pqal" |
| ], |
| "round6/Y_pred": [ |
| "gsm8k_test_500_global_ridge_N12_seed0", |
| "gsm8k_test_500_global_ridge_N12_seed1", |
| "gsm8k_test_500_global_ridge_N12_seed2", |
| "gsm8k_test_500_global_ridge_N16_seed0", |
| "gsm8k_test_500_global_ridge_N16_seed1", |
| "gsm8k_test_500_global_ridge_N16_seed2", |
| "gsm8k_test_500_global_ridge_N24_full", |
| "gsm8k_test_500_global_ridge_N4_seed0", |
| "gsm8k_test_500_global_ridge_N4_seed1", |
| "gsm8k_test_500_global_ridge_N4_seed2", |
| "gsm8k_test_500_global_ridge_N8_seed0", |
| "gsm8k_test_500_global_ridge_N8_seed1", |
| "gsm8k_test_500_global_ridge_N8_seed2", |
| "gsm8k_test_500_mean_N12_seed0", |
| "gsm8k_test_500_mean_N12_seed1", |
| "gsm8k_test_500_mean_N12_seed2", |
| "gsm8k_test_500_mean_N16_seed0", |
| "gsm8k_test_500_mean_N16_seed1", |
| "gsm8k_test_500_mean_N16_seed2", |
| "gsm8k_test_500_mean_N24_full", |
| "gsm8k_test_500_mean_N4_seed0", |
| "gsm8k_test_500_mean_N4_seed1", |
| "gsm8k_test_500_mean_N4_seed2", |
| "gsm8k_test_500_mean_N8_seed0", |
| "gsm8k_test_500_mean_N8_seed1", |
| "gsm8k_test_500_mean_N8_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N12_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N12_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N12_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N16_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N16_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N16_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N24_full", |
| "gsm8k_test_500_topk8_global_ridge_N4_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N4_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N4_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N8_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N8_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N8_seed2", |
| "gsm_hard_global_ridge_N12_seed0", |
| "gsm_hard_global_ridge_N12_seed1", |
| "gsm_hard_global_ridge_N12_seed2", |
| "gsm_hard_global_ridge_N16_seed0", |
| "gsm_hard_global_ridge_N16_seed1", |
| "gsm_hard_global_ridge_N16_seed2", |
| "gsm_hard_global_ridge_N24_full", |
| "gsm_hard_global_ridge_N4_seed0", |
| "gsm_hard_global_ridge_N4_seed1", |
| "gsm_hard_global_ridge_N4_seed2", |
| "gsm_hard_global_ridge_N8_seed0", |
| "gsm_hard_global_ridge_N8_seed1", |
| "gsm_hard_global_ridge_N8_seed2", |
| "gsm_hard_mean_N12_seed0", |
| "gsm_hard_mean_N12_seed1", |
| "gsm_hard_mean_N12_seed2", |
| "gsm_hard_mean_N16_seed0", |
| "gsm_hard_mean_N16_seed1", |
| "gsm_hard_mean_N16_seed2", |
| "gsm_hard_mean_N24_full", |
| "gsm_hard_mean_N4_seed0", |
| "gsm_hard_mean_N4_seed1", |
| "gsm_hard_mean_N4_seed2", |
| "gsm_hard_mean_N8_seed0", |
| "gsm_hard_mean_N8_seed1", |
| "gsm_hard_mean_N8_seed2", |
| "gsm_hard_topk8_global_ridge_N12_seed0", |
| "gsm_hard_topk8_global_ridge_N12_seed1", |
| "gsm_hard_topk8_global_ridge_N12_seed2", |
| "gsm_hard_topk8_global_ridge_N16_seed0", |
| "gsm_hard_topk8_global_ridge_N16_seed1", |
| "gsm_hard_topk8_global_ridge_N16_seed2", |
| "gsm_hard_topk8_global_ridge_N24_full", |
| "gsm_hard_topk8_global_ridge_N4_seed0", |
| "gsm_hard_topk8_global_ridge_N4_seed1", |
| "gsm_hard_topk8_global_ridge_N4_seed2", |
| "gsm_hard_topk8_global_ridge_N8_seed0", |
| "gsm_hard_topk8_global_ridge_N8_seed1", |
| "gsm_hard_topk8_global_ridge_N8_seed2", |
| "mbpp_plus_global_ridge_N12_seed0", |
| "mbpp_plus_global_ridge_N12_seed1", |
| "mbpp_plus_global_ridge_N12_seed2", |
| "mbpp_plus_global_ridge_N16_seed0", |
| "mbpp_plus_global_ridge_N16_seed1", |
| "mbpp_plus_global_ridge_N16_seed2", |
| "mbpp_plus_global_ridge_N24_full", |
| "mbpp_plus_global_ridge_N4_seed0", |
| "mbpp_plus_global_ridge_N4_seed1", |
| "mbpp_plus_global_ridge_N4_seed2", |
| "mbpp_plus_global_ridge_N8_seed0", |
| "mbpp_plus_global_ridge_N8_seed1", |
| "mbpp_plus_global_ridge_N8_seed2", |
| "mbpp_plus_mean_N12_seed0", |
| "mbpp_plus_mean_N12_seed1", |
| "mbpp_plus_mean_N12_seed2", |
| "mbpp_plus_mean_N16_seed0", |
| "mbpp_plus_mean_N16_seed1", |
| "mbpp_plus_mean_N16_seed2", |
| "mbpp_plus_mean_N24_full", |
| "mbpp_plus_mean_N4_seed0", |
| "mbpp_plus_mean_N4_seed1", |
| "mbpp_plus_mean_N4_seed2", |
| "mbpp_plus_mean_N8_seed0", |
| "mbpp_plus_mean_N8_seed1", |
| "mbpp_plus_mean_N8_seed2", |
| "mbpp_plus_topk8_global_ridge_N12_seed0", |
| "mbpp_plus_topk8_global_ridge_N12_seed1", |
| "mbpp_plus_topk8_global_ridge_N12_seed2", |
| "mbpp_plus_topk8_global_ridge_N16_seed0", |
| "mbpp_plus_topk8_global_ridge_N16_seed1", |
| "mbpp_plus_topk8_global_ridge_N16_seed2", |
| "mbpp_plus_topk8_global_ridge_N24_full", |
| "mbpp_plus_topk8_global_ridge_N4_seed0", |
| "mbpp_plus_topk8_global_ridge_N4_seed1", |
| "mbpp_plus_topk8_global_ridge_N4_seed2", |
| "mbpp_plus_topk8_global_ridge_N8_seed0", |
| "mbpp_plus_topk8_global_ridge_N8_seed1", |
| "mbpp_plus_topk8_global_ridge_N8_seed2", |
| "mbpp_test_held_global_ridge_N12_seed0", |
| "mbpp_test_held_global_ridge_N12_seed1", |
| "mbpp_test_held_global_ridge_N12_seed2", |
| "mbpp_test_held_global_ridge_N16_seed0", |
| "mbpp_test_held_global_ridge_N16_seed1", |
| "mbpp_test_held_global_ridge_N16_seed2", |
| "mbpp_test_held_global_ridge_N24_full", |
| "mbpp_test_held_global_ridge_N4_seed0", |
| "mbpp_test_held_global_ridge_N4_seed1", |
| "mbpp_test_held_global_ridge_N4_seed2", |
| "mbpp_test_held_global_ridge_N8_seed0", |
| "mbpp_test_held_global_ridge_N8_seed1", |
| "mbpp_test_held_global_ridge_N8_seed2", |
| "mbpp_test_held_mean_N12_seed0", |
| "mbpp_test_held_mean_N12_seed1", |
| "mbpp_test_held_mean_N12_seed2", |
| "mbpp_test_held_mean_N16_seed0", |
| "mbpp_test_held_mean_N16_seed1", |
| "mbpp_test_held_mean_N16_seed2", |
| "mbpp_test_held_mean_N24_full", |
| "mbpp_test_held_mean_N4_seed0", |
| "mbpp_test_held_mean_N4_seed1", |
| "mbpp_test_held_mean_N4_seed2", |
| "mbpp_test_held_mean_N8_seed0", |
| "mbpp_test_held_mean_N8_seed1", |
| "mbpp_test_held_mean_N8_seed2", |
| "mbpp_test_held_topk8_global_ridge_N12_seed0", |
| "mbpp_test_held_topk8_global_ridge_N12_seed1", |
| "mbpp_test_held_topk8_global_ridge_N12_seed2", |
| "mbpp_test_held_topk8_global_ridge_N16_seed0", |
| "mbpp_test_held_topk8_global_ridge_N16_seed1", |
| "mbpp_test_held_topk8_global_ridge_N16_seed2", |
| "mbpp_test_held_topk8_global_ridge_N24_full", |
| "mbpp_test_held_topk8_global_ridge_N4_seed0", |
| "mbpp_test_held_topk8_global_ridge_N4_seed1", |
| "mbpp_test_held_topk8_global_ridge_N4_seed2", |
| "mbpp_test_held_topk8_global_ridge_N8_seed0", |
| "mbpp_test_held_topk8_global_ridge_N8_seed1", |
| "mbpp_test_held_topk8_global_ridge_N8_seed2", |
| "openbookqa_test_global_ridge_N12_seed0", |
| "openbookqa_test_global_ridge_N12_seed1", |
| "openbookqa_test_global_ridge_N12_seed2", |
| "openbookqa_test_global_ridge_N16_seed0", |
| "openbookqa_test_global_ridge_N16_seed1", |
| "openbookqa_test_global_ridge_N16_seed2", |
| "openbookqa_test_global_ridge_N24_full", |
| "openbookqa_test_global_ridge_N4_seed0", |
| "openbookqa_test_global_ridge_N4_seed1", |
| "openbookqa_test_global_ridge_N4_seed2", |
| "openbookqa_test_global_ridge_N8_seed0", |
| "openbookqa_test_global_ridge_N8_seed1", |
| "openbookqa_test_global_ridge_N8_seed2", |
| "openbookqa_test_mean_N12_seed0", |
| "openbookqa_test_mean_N12_seed1", |
| "openbookqa_test_mean_N12_seed2", |
| "openbookqa_test_mean_N16_seed0", |
| "openbookqa_test_mean_N16_seed1", |
| "openbookqa_test_mean_N16_seed2", |
| "openbookqa_test_mean_N24_full", |
| "openbookqa_test_mean_N4_seed0", |
| "openbookqa_test_mean_N4_seed1", |
| "openbookqa_test_mean_N4_seed2", |
| "openbookqa_test_mean_N8_seed0", |
| "openbookqa_test_mean_N8_seed1", |
| "openbookqa_test_mean_N8_seed2", |
| "openbookqa_test_topk8_global_ridge_N12_seed0", |
| "openbookqa_test_topk8_global_ridge_N12_seed1", |
| "openbookqa_test_topk8_global_ridge_N12_seed2", |
| "openbookqa_test_topk8_global_ridge_N16_seed0", |
| "openbookqa_test_topk8_global_ridge_N16_seed1", |
| "openbookqa_test_topk8_global_ridge_N16_seed2", |
| "openbookqa_test_topk8_global_ridge_N24_full", |
| "openbookqa_test_topk8_global_ridge_N4_seed0", |
| "openbookqa_test_topk8_global_ridge_N4_seed1", |
| "openbookqa_test_topk8_global_ridge_N4_seed2", |
| "openbookqa_test_topk8_global_ridge_N8_seed0", |
| "openbookqa_test_topk8_global_ridge_N8_seed1", |
| "openbookqa_test_topk8_global_ridge_N8_seed2" |
| ] |
| }, |
| "missing": [], |
| "count_warnings": [] |
| }, |
| "baselines": { |
| "gsm_hard": { |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15 |
| }, |
| "gsm8k_test_500": { |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333 |
| }, |
| "mbpp_test_held": { |
| "base_Y": 0.23, |
| "oracle": 0.32 |
| }, |
| "mbpp_plus": { |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45 |
| }, |
| "openbookqa_test": { |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333 |
| } |
| }, |
| "r6_reference": { |
| "N16_global_ridge_gap_recovered_mean": 0.1365069252111935, |
| "N24_global_ridge_gap_recovered_mean": 0.13478416569879983, |
| "N24_topk8_global_ridge_gap_recovered_mean": 0.12109363366985319 |
| }, |
| "records": [ |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk12_global_ridge", |
| "topk_K": 12, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk12_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat" |
| ], |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 8.139, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk16_global_ridge", |
| "topk_K": 16, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk16_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:openbookqa", |
| "r5:conala_curated", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy" |
| ], |
| "accuracy": 0.09333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 8.196, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.06250000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk20_global_ridge", |
| "topk_K": 20, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk20_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:openbookqa", |
| "r5:conala_curated", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy", |
| "r4:mbpp", |
| "r5:mbpp_sanitized", |
| "r4:sciq", |
| "r5:math_counting_easy" |
| ], |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 8.624, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.09375000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk24_global_ridge", |
| "topk_K": 24, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk24_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:openbookqa", |
| "r5:conala_curated", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy", |
| "r5:mbpp_sanitized", |
| "r4:mbpp", |
| "r4:sciq", |
| "r5:math_counting_easy", |
| "r5:aqua_rat_numeric", |
| "r4:arc_easy", |
| "r4:gsm8k", |
| "r5:mawps" |
| ], |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 8.117, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk2_global_ridge", |
| "topk_K": 2, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk2_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized" |
| ], |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 7.03, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": -0.07812499999999999, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk4_global_ridge", |
| "topk_K": 4, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk4_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r4:humaneval" |
| ], |
| "accuracy": 0.08, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 7.458, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk6_global_ridge", |
| "topk_K": 6, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk6_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith" |
| ], |
| "accuracy": 0.08666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 7.563, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.03125000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk8_global_ridge", |
| "topk_K": 8, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk8_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math" |
| ], |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 7.504, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.09375000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk12_global_ridge", |
| "topk_K": 12, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk12_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat" |
| ], |
| "accuracy": 0.06, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 22.248, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.038461538461538554, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk16_global_ridge", |
| "topk_K": 16, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk16_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:openbookqa", |
| "r5:medmcqa_easy", |
| "r4:medmcqa_easy", |
| "r5:conala_curated" |
| ], |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 23.394, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk20_global_ridge", |
| "topk_K": 20, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk20_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:openbookqa", |
| "r5:medmcqa_easy", |
| "r4:medmcqa_easy", |
| "r5:conala_curated", |
| "r4:mbpp", |
| "r5:mbpp_sanitized", |
| "r5:math_counting_easy", |
| "r4:sciq" |
| ], |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 23.314, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk24_global_ridge", |
| "topk_K": 24, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk24_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math", |
| "r4:mmlu_high_school_biology", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:openbookqa", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy", |
| "r5:conala_curated", |
| "r5:mbpp_sanitized", |
| "r4:mbpp", |
| "r5:math_counting_easy", |
| "r4:sciq", |
| "r4:gsm8k", |
| "r5:aqua_rat_numeric", |
| "r4:arc_easy", |
| "r5:mawps" |
| ], |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 23.33, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk2_global_ridge", |
| "topk_K": 2, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk2_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized" |
| ], |
| "accuracy": 0.03666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 19.311, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.30769230769230776, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk4_global_ridge", |
| "topk_K": 4, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk4_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r4:humaneval" |
| ], |
| "accuracy": 0.04666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 24.59, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.19230769230769237, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk6_global_ridge", |
| "topk_K": 6, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk6_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith" |
| ], |
| "accuracy": 0.056666666666666664, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 22.195, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.07692307692307702, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk8_global_ridge", |
| "topk_K": 8, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk8_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:math_counting_easy", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_high_school_physics", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:mmlu_elementary_math" |
| ], |
| "accuracy": 0.06, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 21.359, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.038461538461538554, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk12_global_ridge", |
| "topk_K": 12, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk12_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat" |
| ], |
| "accuracy": 0.27666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 149.129, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.2571428571428571, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk16_global_ridge", |
| "topk_K": 16, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk16_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:conala_curated", |
| "r4:openbookqa", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy" |
| ], |
| "accuracy": 0.26666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 151.069, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.21428571428571425, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk20_global_ridge", |
| "topk_K": 20, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk20_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:conala_curated", |
| "r4:openbookqa", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy", |
| "r5:math_counting_easy", |
| "r4:mbpp", |
| "r5:mbpp_sanitized", |
| "r4:arc_easy" |
| ], |
| "accuracy": 0.2633333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 149.578, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.19999999999999984, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk24_global_ridge", |
| "topk_K": 24, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk24_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:conala_curated", |
| "r4:openbookqa", |
| "r5:medmcqa_easy", |
| "r4:medmcqa_easy", |
| "r5:math_counting_easy", |
| "r4:mbpp", |
| "r5:mbpp_sanitized", |
| "r4:arc_easy", |
| "r4:sciq", |
| "r4:gsm8k", |
| "r5:mawps", |
| "r5:aqua_rat_numeric" |
| ], |
| "accuracy": 0.26, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 149.26, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.18571428571428572, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk2_global_ridge", |
| "topk_K": 2, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk2_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:humaneval" |
| ], |
| "accuracy": 0.26666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 151.982, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.21428571428571425, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk4_global_ridge", |
| "topk_K": 4, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk4_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:humaneval", |
| "r5:humaneval", |
| "r4:math_counting_easy" |
| ], |
| "accuracy": 0.27, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 148.839, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.22857142857142862, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk6_global_ridge", |
| "topk_K": 6, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk6_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith" |
| ], |
| "accuracy": 0.27, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 152.179, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.22857142857142862, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk8_global_ridge", |
| "topk_K": 8, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk8_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math" |
| ], |
| "accuracy": 0.2733333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 152.508, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.24285714285714274, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk12_global_ridge", |
| "topk_K": 12, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk12_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat" |
| ], |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 50.687, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk16_global_ridge", |
| "topk_K": 16, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk16_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:conala_curated", |
| "r4:openbookqa", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy" |
| ], |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 50.62, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk20_global_ridge", |
| "topk_K": 20, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk20_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:conala_curated", |
| "r4:openbookqa", |
| "r5:medmcqa_easy", |
| "r4:medmcqa_easy", |
| "r5:mbpp_sanitized", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r5:math_counting_easy" |
| ], |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 51.775, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk24_global_ridge", |
| "topk_K": 24, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk24_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:conala_curated", |
| "r4:openbookqa", |
| "r5:medmcqa_easy", |
| "r4:medmcqa_easy", |
| "r4:mbpp", |
| "r5:mbpp_sanitized", |
| "r4:arc_easy", |
| "r5:math_counting_easy", |
| "r4:sciq", |
| "r5:mawps", |
| "r4:gsm8k", |
| "r5:aqua_rat_numeric" |
| ], |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 50.243, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk2_global_ridge", |
| "topk_K": 2, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk2_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy" |
| ], |
| "accuracy": 0.26, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 0, |
| "eval_seconds": 51.753, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.3333333333333333, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk4_global_ridge", |
| "topk_K": 4, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk4_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r5:humaneval" |
| ], |
| "accuracy": 0.26, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 51.777, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.3333333333333333, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk6_global_ridge", |
| "topk_K": 6, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk6_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith" |
| ], |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 51.66, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk8_global_ridge", |
| "topk_K": 8, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk8_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_elementary_math" |
| ], |
| "accuracy": 0.26, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 51.078, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.3333333333333333, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk12_global_ridge", |
| "topk_K": 12, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk12_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:mmlu_elementary_math", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r4:openbookqa", |
| "r5:pubmedqa_pqal" |
| ], |
| "accuracy": 0.7466666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 21.136, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.1341463414634149, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk16_global_ridge", |
| "topk_K": 16, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk16_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:mmlu_elementary_math", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r4:openbookqa", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r5:medmcqa_easy", |
| "r4:medmcqa_easy", |
| "r5:conala_curated" |
| ], |
| "accuracy": 0.75, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 23.81, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.14634146341463428, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk20_global_ridge", |
| "topk_K": 20, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk20_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:mmlu_elementary_math", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r4:openbookqa", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy", |
| "r5:conala_curated", |
| "r4:sciq", |
| "r5:mbpp_sanitized", |
| "r4:mbpp", |
| "r5:math_counting_easy" |
| ], |
| "accuracy": 0.7466666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 23.886, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.1341463414634149, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk24_global_ridge", |
| "topk_K": 24, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk24_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:mmlu_elementary_math", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith", |
| "r4:math_algebra_easy", |
| "r4:svamp", |
| "r4:openbookqa", |
| "r5:pubmedqa_pqal", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:medmcqa_easy", |
| "r5:conala_curated", |
| "r4:sciq", |
| "r5:mbpp_sanitized", |
| "r4:mbpp", |
| "r5:math_counting_easy", |
| "r4:gsm8k", |
| "r5:mawps", |
| "r5:aqua_rat_numeric", |
| "r4:arc_easy" |
| ], |
| "accuracy": 0.75, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 21.378, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.14634146341463428, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk2_global_ridge", |
| "topk_K": 2, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk2_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology" |
| ], |
| "accuracy": 0.71, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 28.785, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.0, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk4_global_ridge", |
| "topk_K": 4, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk4_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy" |
| ], |
| "accuracy": 0.7033333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 17.313, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": -0.024390243902438775, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk6_global_ridge", |
| "topk_K": 6, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk6_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:mmlu_elementary_math", |
| "r5:humaneval" |
| ], |
| "accuracy": 0.7133333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 23.886, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.012195121951219795, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1A_k_sweep", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "topk8_global_ridge", |
| "topk_K": 8, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk8_global_ridge_N24_full", |
| "selected_topk": [ |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_high_school_biology", |
| "r4:mbpp_sanitized", |
| "r4:math_counting_easy", |
| "r4:mmlu_elementary_math", |
| "r5:humaneval", |
| "r4:humaneval", |
| "r4:multiarith" |
| ], |
| "accuracy": 0.7133333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 22.785, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.012195121951219795, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 12.148, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.09375000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.10333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 12.822, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.109375, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 8.836, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.08666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 8.773, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.03125000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 11.491, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.09375000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.09, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 8.275, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.04687499999999998, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.09, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 9.787, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.04687499999999998, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.10666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 11.914, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.12500000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.08666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 9.246, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.03125000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.10666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 8.994, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.12500000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.10333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 9.926, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.109375, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.10666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 7.993, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.12500000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.08333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 8.441, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.015624999999999972, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.08333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 8.255, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.015624999999999972, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 7.784, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 9.276, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.09375000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.10333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 9.883, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.109375, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.09333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 8.051, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.06250000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 10.998, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.09375000000000003, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.08666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 8.517, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.03125000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm8k_test_500", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.09333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 8.354, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.06250000000000001, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 28.943, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.15384615384615374, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.07, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.182, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.07692307692307694, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 25.292, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 26.471, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.038461538461538394, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.07, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 27.19, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.07692307692307694, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 22.78, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.412, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.038461538461538394, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.07333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 26.601, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.11538461538461534, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 26.109, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 24.319, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.15384615384615374, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.07, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 27.608, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.07692307692307694, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 24.916, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 24.16, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.038461538461538394, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 24.881, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 24.068, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 26.767, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.038461538461538394, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.056666666666666664, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 23.901, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.07692307692307702, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 26.267, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.07, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 28.154, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.07692307692307694, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 25.493, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "gsm_hard", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.07, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 25.138, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.07692307692307694, |
| "domain": "math" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.21666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 152.235, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.0, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.22333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 153.508, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.02857142857142852, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.21666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 154.26, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.0, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.21666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 156.161, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.0, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.2633333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 151.866, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.19999999999999984, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.26666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 150.685, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.21428571428571425, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.21666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 155.572, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.0, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.23666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 152.068, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.08571428571428567, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 147.141, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.14285714285714282, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.21666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 154.561, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.0, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 152.767, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": -0.01428571428571426, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 155.531, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": -0.01428571428571426, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.2633333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 149.424, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.19999999999999984, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.26666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 153.68, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.21428571428571425, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.26666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 153.64, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.21428571428571425, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 152.497, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.057142857142857155, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.23333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 144.794, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.07142857142857141, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 155.568, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.09999999999999992, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.21, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 156.101, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": -0.028571428571428636, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.2733333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 153.146, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.24285714285714274, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_plus", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.22333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 151.112, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.02857142857142852, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 0, |
| "eval_seconds": 46.874, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 46.856, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.11111111111111091, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 48.369, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 50.528, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.26, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 50.432, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.3333333333333333, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 50.815, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 48.905, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 51.877, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 51.357, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 48.404, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 49.682, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.11111111111111091, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 47.994, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.11111111111111091, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 51.19, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 51.786, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 51.484, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 47.247, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 51.2, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 50.316, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 48.081, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 0, |
| "eval_seconds": 50.253, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "mbpp_test_held", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 48.42, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213, |
| "domain": "code" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.7366666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 16.579, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.09756097560975632, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.7366666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 6.928, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.09756097560975632, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.7666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 7.813, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.20731707317073203, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.73, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 17.824, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.07317073170731714, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.7, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 12.94, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": -0.03658536585365857, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.7666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 7.8, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.20731707317073203, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_elementary_math", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed0", |
| "selected_topk": null, |
| "accuracy": 0.7266666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 14.833, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.060975609756097754, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:arc_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed1", |
| "selected_topk": null, |
| "accuracy": 0.7266666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 9.166, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.060975609756097754, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 12, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed2", |
| "selected_topk": null, |
| "accuracy": 0.7666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 5.952, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.20731707317073203, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.7333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 10.527, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.08536585365853654, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.7666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 11.084, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.20731707317073203, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.7666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 8.293, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.20731707317073203, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.7333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 15.941, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.08536585365853654, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.7366666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 14.792, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.09756097560975632, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.7633333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 7.42, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.19512195121951226, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 0, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:math_counting_easy", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed0", |
| "selected_topk": null, |
| "accuracy": 0.7433333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 9.618, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.1219512195121951, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 1, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:math_counting_easy", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed1", |
| "selected_topk": null, |
| "accuracy": 0.7466666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 11.075, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.1341463414634149, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 16, |
| "seed": 2, |
| "deterministic_full_pool": false, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mbpp_sanitized", |
| "r5:conala_curated", |
| "r5:medmcqa_easy" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed2", |
| "selected_topk": null, |
| "accuracy": 0.76, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 4.516, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.18292682926829285, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_pca", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.7666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 13.996, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.20731707317073203, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "pertensor_ridge", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.7466666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 23.164, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.1341463414634149, |
| "domain": "science" |
| }, |
| { |
| "subexperiment": "1B_pertensor_methods", |
| "task": "openbookqa_test", |
| "N": 24, |
| "seed": 0, |
| "deterministic_full_pool": true, |
| "method": "procrustes", |
| "topk_K": null, |
| "anchors": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N24_full", |
| "selected_topk": null, |
| "accuracy": 0.74, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 7.859, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.10975609756097571, |
| "domain": "science" |
| } |
| ], |
| "summary": { |
| "k_sweep": { |
| "2": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.03236034798534796, |
| "gap_recovered_std": 0.2512825052045027, |
| "accuracy_mean": 0.2673333333333333 |
| }, |
| "4": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.06904136513892616, |
| "gap_recovered_std": 0.21041601120601108, |
| "accuracy_mean": 0.272 |
| }, |
| "6": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.08346313916435871, |
| "gap_recovered_std": 0.13586719552143517, |
| "accuracy_mean": 0.2753333333333333 |
| }, |
| "8": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.12873481193603145, |
| "gap_recovered_std": 0.15624061361170813, |
| "accuracy_mean": 0.2813333333333333 |
| }, |
| "12": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.13063497647339112, |
| "gap_recovered_std": 0.1180632522546757, |
| "accuracy_mean": 0.28600000000000003 |
| }, |
| "16": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.12906987998451414, |
| "gap_recovered_std": 0.09660226150894938, |
| "accuracy_mean": 0.2846666666666667 |
| }, |
| "20": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.13002371273712737, |
| "gap_recovered_std": 0.08893282592449571, |
| "accuracy_mean": 0.2846666666666667 |
| }, |
| "24": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.12648059427022842, |
| "gap_recovered_std": 0.08860748821676426, |
| "accuracy_mean": 0.284 |
| } |
| }, |
| "pertensor_methods": { |
| "pertensor_ridge": { |
| "12": { |
| "n_records": 15, |
| "gap_recovered_mean": 0.1148817030981665, |
| "gap_recovered_std": 0.10917220312300747, |
| "accuracy_mean": 0.2786666666666667 |
| }, |
| "16": { |
| "n_records": 15, |
| "gap_recovered_mean": 0.12140822761249587, |
| "gap_recovered_std": 0.0932870579211698, |
| "accuracy_mean": 0.2824444444444445 |
| }, |
| "24": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.12609514130855595, |
| "gap_recovered_std": 0.10935514163267879, |
| "accuracy_mean": 0.28400000000000003 |
| } |
| }, |
| "procrustes": { |
| "12": { |
| "n_records": 15, |
| "gap_recovered_mean": 0.10543183611781175, |
| "gap_recovered_std": 0.08157963485059341, |
| "accuracy_mean": 0.2773333333333333 |
| }, |
| "16": { |
| "n_records": 15, |
| "gap_recovered_mean": 0.10409506313469727, |
| "gap_recovered_std": 0.0855081370879396, |
| "accuracy_mean": 0.2791111111111111 |
| }, |
| "24": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.09999456505554065, |
| "gap_recovered_std": 0.07428709190008917, |
| "accuracy_mean": 0.2753333333333333 |
| } |
| }, |
| "pertensor_pca": { |
| "12": { |
| "n_records": 15, |
| "gap_recovered_mean": 0.09990568261909726, |
| "gap_recovered_std": 0.07590792386005181, |
| "accuracy_mean": 0.27644444444444444 |
| }, |
| "16": { |
| "n_records": 15, |
| "gap_recovered_mean": 0.10040114977614979, |
| "gap_recovered_std": 0.07987630180578464, |
| "accuracy_mean": 0.2777777777777778 |
| }, |
| "24": { |
| "n_records": 5, |
| "gap_recovered_mean": 0.1143281887489205, |
| "gap_recovered_std": 0.10311517826966993, |
| "accuracy_mean": 0.2793333333333333 |
| } |
| } |
| }, |
| "n24_six_method_comparison": { |
| "mean": { |
| "source": "R6", |
| "n_records": 5, |
| "gap_recovered_mean": 0.0830787285208017, |
| "gap_recovered_std": 0.07181727060927716 |
| }, |
| "global_ridge": { |
| "source": "R6", |
| "n_records": 5, |
| "gap_recovered_mean": 0.13478416569879983, |
| "gap_recovered_std": 0.10350184199429305 |
| }, |
| "topk8_global_ridge": { |
| "source": "R6", |
| "n_records": 5, |
| "gap_recovered_mean": 0.12109363366985318, |
| "gap_recovered_std": 0.12401845134797244 |
| }, |
| "pertensor_ridge": { |
| "source": "R8", |
| "n_records": 5, |
| "gap_recovered_mean": 0.12609514130855595, |
| "gap_recovered_std": 0.10935514163267879 |
| }, |
| "procrustes": { |
| "source": "R8", |
| "n_records": 5, |
| "gap_recovered_mean": 0.09999456505554065, |
| "gap_recovered_std": 0.07428709190008917 |
| }, |
| "pertensor_pca": { |
| "source": "R8", |
| "n_records": 5, |
| "gap_recovered_mean": 0.1143281887489205, |
| "gap_recovered_std": 0.10311517826966993 |
| } |
| } |
| }, |
| "derived_from_r6_records": { |
| "description": "Per-task N=16 stats derived from R6 records plus R8 N=16 new-method records; no new compute for R6 columns.", |
| "per_task": { |
| "gsm_hard": { |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "r6_N16": { |
| "mean": { |
| "accuracy": { |
| "mean": 0.06555555555555555, |
| "std": 0.0050917507721731595, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.025641025641025595, |
| "std": 0.058750970448151855, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "global_ridge": { |
| "accuracy": { |
| "mean": 0.061111111111111116, |
| "std": 0.0038490017945975096, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": -0.025641025641025675, |
| "std": 0.04441155916843281, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "topk8_global_ridge": { |
| "accuracy": { |
| "mean": 0.06222222222222223, |
| "std": 0.005091750772173158, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": -0.012820512820512877, |
| "std": 0.05875097044815183, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "r8_N16": { |
| "pertensor_ridge": { |
| "accuracy": { |
| "mean": 0.06444444444444446, |
| "std": 0.0019245008972987488, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.012820512820512798, |
| "std": 0.02220577958421634, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "procrustes": { |
| "accuracy": { |
| "mean": 0.062222222222222213, |
| "std": 0.005091750772173157, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": -0.012820512820512877, |
| "std": 0.05875097044815183, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "pertensor_pca": { |
| "accuracy": { |
| "mean": 0.07, |
| "std": 0.006666666666666661, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.07692307692307689, |
| "std": 0.07692307692307687, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "best_R6_N16": { |
| "method": "mean", |
| "source": "R6", |
| "accuracy_mean": 0.06555555555555555, |
| "accuracy_std": 0.0050917507721731595, |
| "gap_recovered_mean": 0.025641025641025595, |
| "gap_recovered_std": 0.058750970448151855, |
| "n": 3 |
| }, |
| "best_R8_new_N16": { |
| "method": "pertensor_pca", |
| "source": "R8", |
| "accuracy_mean": 0.07, |
| "accuracy_std": 0.006666666666666661, |
| "gap_recovered_mean": 0.07692307692307689, |
| "gap_recovered_std": 0.07692307692307687, |
| "n": 3 |
| }, |
| "best_learned_N16": { |
| "method": "pertensor_pca", |
| "source": "R8", |
| "accuracy_mean": 0.07, |
| "accuracy_std": 0.006666666666666661, |
| "gap_recovered_mean": 0.07692307692307689, |
| "gap_recovered_std": 0.07692307692307687, |
| "n": 3 |
| } |
| }, |
| "gsm8k_test_500": { |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "r6_N16": { |
| "mean": { |
| "accuracy": { |
| "mean": 0.10222222222222221, |
| "std": 0.001924500897298749, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.10416666666666667, |
| "std": 0.009021097956087886, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "global_ridge": { |
| "accuracy": { |
| "mean": 0.09222222222222222, |
| "std": 0.010715167512214395, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.057291666666666664, |
| "std": 0.05022734771350498, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "topk8_global_ridge": { |
| "accuracy": { |
| "mean": 0.09333333333333334, |
| "std": 0.008819171036881974, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.0625, |
| "std": 0.04133986423538425, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "r8_N16": { |
| "pertensor_ridge": { |
| "accuracy": { |
| "mean": 0.08777777777777777, |
| "std": 0.0076980035891950115, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.03645833333333331, |
| "std": 0.036084391824351615, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "procrustes": { |
| "accuracy": { |
| "mean": 0.09888888888888887, |
| "std": 0.0050917507721731535, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.08854166666666667, |
| "std": 0.023867581744561665, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "pertensor_pca": { |
| "accuracy": { |
| "mean": 0.10555555555555556, |
| "std": 0.0019245008972987568, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.11979166666666669, |
| "std": 0.00902109795608792, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "best_R6_N16": { |
| "method": "mean", |
| "source": "R6", |
| "accuracy_mean": 0.10222222222222221, |
| "accuracy_std": 0.001924500897298749, |
| "gap_recovered_mean": 0.10416666666666667, |
| "gap_recovered_std": 0.009021097956087886, |
| "n": 3 |
| }, |
| "best_R8_new_N16": { |
| "method": "pertensor_pca", |
| "source": "R8", |
| "accuracy_mean": 0.10555555555555556, |
| "accuracy_std": 0.0019245008972987568, |
| "gap_recovered_mean": 0.11979166666666669, |
| "gap_recovered_std": 0.00902109795608792, |
| "n": 3 |
| }, |
| "best_learned_N16": { |
| "method": "pertensor_pca", |
| "source": "R8", |
| "accuracy_mean": 0.10555555555555556, |
| "accuracy_std": 0.0019245008972987568, |
| "gap_recovered_mean": 0.11979166666666669, |
| "gap_recovered_std": 0.00902109795608792, |
| "n": 3 |
| } |
| }, |
| "mbpp_test_held": { |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "r6_N16": { |
| "mean": { |
| "accuracy": { |
| "mean": 0.24, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.1111111111111109, |
| "std": 1.6996749443881478e-17, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "global_ridge": { |
| "accuracy": { |
| "mean": 0.25666666666666665, |
| "std": 0.005773502691896262, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.2962962962962963, |
| "std": 0.06415002990995847, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "topk8_global_ridge": { |
| "accuracy": { |
| "mean": 0.25, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.22222222222222213, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "r8_N16": { |
| "pertensor_ridge": { |
| "accuracy": { |
| "mean": 0.25, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.22222222222222213, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "procrustes": { |
| "accuracy": { |
| "mean": 0.25, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.22222222222222213, |
| "std": 0.0, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "pertensor_pca": { |
| "accuracy": { |
| "mean": 0.24333333333333332, |
| "std": 0.005773502691896263, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.148148148148148, |
| "std": 0.06415002990995848, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "best_R6_N16": { |
| "method": "global_ridge", |
| "source": "R6", |
| "accuracy_mean": 0.25666666666666665, |
| "accuracy_std": 0.005773502691896262, |
| "gap_recovered_mean": 0.2962962962962963, |
| "gap_recovered_std": 0.06415002990995847, |
| "n": 3 |
| }, |
| "best_R8_new_N16": { |
| "method": "pertensor_ridge", |
| "source": "R8", |
| "accuracy_mean": 0.25, |
| "accuracy_std": 0.0, |
| "gap_recovered_mean": 0.22222222222222213, |
| "gap_recovered_std": 0.0, |
| "n": 3 |
| }, |
| "best_learned_N16": { |
| "method": "global_ridge", |
| "source": "R6", |
| "accuracy_mean": 0.25666666666666665, |
| "accuracy_std": 0.005773502691896262, |
| "gap_recovered_mean": 0.2962962962962963, |
| "gap_recovered_std": 0.06415002990995847, |
| "n": 3 |
| } |
| }, |
| "mbpp_plus": { |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "r6_N16": { |
| "mean": { |
| "accuracy": { |
| "mean": 0.21222222222222223, |
| "std": 0.0019245008972987648, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": -0.019047619047619053, |
| "std": 0.008247860988423278, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "global_ridge": { |
| "accuracy": { |
| "mean": 0.27, |
| "std": 0.003333333333333327, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.22857142857142854, |
| "std": 0.014285714285714249, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "topk8_global_ridge": { |
| "accuracy": { |
| "mean": 0.2688888888888889, |
| "std": 0.0038490017945974975, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.22380952380952376, |
| "std": 0.016495721976846407, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "r8_N16": { |
| "pertensor_ridge": { |
| "accuracy": { |
| "mean": 0.26555555555555554, |
| "std": 0.0019245008972987648, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.20952380952380945, |
| "std": 0.008247860988423292, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "procrustes": { |
| "accuracy": { |
| "mean": 0.23444444444444446, |
| "std": 0.0050917507721731465, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.07619047619047616, |
| "std": 0.021821789023599193, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "pertensor_pca": { |
| "accuracy": { |
| "mean": 0.21444444444444447, |
| "std": 0.0019245008972987488, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": -0.009523809523809506, |
| "std": 0.008247860988423209, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "best_R6_N16": { |
| "method": "global_ridge", |
| "source": "R6", |
| "accuracy_mean": 0.27, |
| "accuracy_std": 0.003333333333333327, |
| "gap_recovered_mean": 0.22857142857142854, |
| "gap_recovered_std": 0.014285714285714249, |
| "n": 3 |
| }, |
| "best_R8_new_N16": { |
| "method": "pertensor_ridge", |
| "source": "R8", |
| "accuracy_mean": 0.26555555555555554, |
| "accuracy_std": 0.0019245008972987648, |
| "gap_recovered_mean": 0.20952380952380945, |
| "gap_recovered_std": 0.008247860988423292, |
| "n": 3 |
| }, |
| "best_learned_N16": { |
| "method": "global_ridge", |
| "source": "R6", |
| "accuracy_mean": 0.27, |
| "accuracy_std": 0.003333333333333327, |
| "gap_recovered_mean": 0.22857142857142854, |
| "gap_recovered_std": 0.014285714285714249, |
| "n": 3 |
| } |
| }, |
| "openbookqa_test": { |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "r6_N16": { |
| "mean": { |
| "accuracy": { |
| "mean": 0.7544444444444444, |
| "std": 0.001924500897298797, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.16260162601626027, |
| "std": 0.007040856941337066, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "global_ridge": { |
| "accuracy": { |
| "mean": 0.7444444444444445, |
| "std": 0.015030832509409663, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.1260162601626017, |
| "std": 0.054990850644181695, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "topk8_global_ridge": { |
| "accuracy": { |
| "mean": 0.7433333333333333, |
| "std": 0.01527525231651948, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.12195121951219523, |
| "std": 0.05588506945068102, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "r8_N16": { |
| "pertensor_ridge": { |
| "accuracy": { |
| "mean": 0.7444444444444445, |
| "std": 0.016442942874387488, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.1260162601626017, |
| "std": 0.060157108077027406, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "procrustes": { |
| "accuracy": { |
| "mean": 0.75, |
| "std": 0.008819171036881984, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.14634146341463428, |
| "std": 0.03226525989103164, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| }, |
| "pertensor_pca": { |
| "accuracy": { |
| "mean": 0.7555555555555555, |
| "std": 0.019245008972987587, |
| "n": 3 |
| }, |
| "gap_recovered": { |
| "mean": 0.16666666666666685, |
| "std": 0.07040856941336922, |
| "n": 3 |
| }, |
| "seeds": [ |
| 0, |
| 1, |
| 2 |
| ] |
| } |
| }, |
| "best_R6_N16": { |
| "method": "mean", |
| "source": "R6", |
| "accuracy_mean": 0.7544444444444444, |
| "accuracy_std": 0.001924500897298797, |
| "gap_recovered_mean": 0.16260162601626027, |
| "gap_recovered_std": 0.007040856941337066, |
| "n": 3 |
| }, |
| "best_R8_new_N16": { |
| "method": "pertensor_pca", |
| "source": "R8", |
| "accuracy_mean": 0.7555555555555555, |
| "accuracy_std": 0.019245008972987587, |
| "gap_recovered_mean": 0.16666666666666685, |
| "gap_recovered_std": 0.07040856941336922, |
| "n": 3 |
| }, |
| "best_learned_N16": { |
| "method": "pertensor_pca", |
| "source": "R8", |
| "accuracy_mean": 0.7555555555555555, |
| "accuracy_std": 0.019245008972987587, |
| "gap_recovered_mean": 0.16666666666666685, |
| "gap_recovered_std": 0.07040856941336922, |
| "n": 3 |
| } |
| } |
| } |
| }, |
| "derived_from_r6_records_source": { |
| "results_round6_json_pulled_from_hub": true, |
| "r6_record_count": 195 |
| } |
| } |