cross-model-lora-prediction-3b / results_round8.json
CK0607's picture
Round 8 gap-fill artifact: results_round8.json
833d04e verified
raw
history blame
212 kB
{
"config": {
"model_X": "Qwen/Qwen2.5-3B-Instruct",
"model_Y": "meta-llama/Llama-3.2-3B-Instruct",
"hub_repo": "CK0607/cross-model-lora-prediction-3b",
"round8_real_generation_eval": true,
"no_surrogate": true,
"no_retraining": true,
"eval_examples_requested": 300,
"generation": {
"do_sample": false,
"num_beams": 1,
"greedy": true,
"max_new_tokens_code": 96,
"max_new_tokens_other": 24
},
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"heldouts": [
"gsm_hard",
"gsm8k_test_500",
"mbpp_test_held",
"mbpp_plus",
"openbookqa_test"
],
"subexperiments": {
"1A_k_sweep": {
"N": 24,
"seed": 0,
"K_values": [
2,
4,
6,
8,
12,
16,
20,
24
],
"cells": 40
},
"1B_pertensor_methods": {
"N_values": [
12,
16,
24
],
"methods": [
"pertensor_ridge",
"procrustes",
"pertensor_pca"
],
"seeds_for_subsampled_N": [
0,
1,
2
],
"N24_seed": 0,
"cells": 105
},
"1C_per_task_breakdown": {
"derived_from": "results_round6.json records plus R8 N=16 records",
"new_compute": false
}
},
"budget_reduction": null,
"wall_seconds": 3221.695
},
"adapter_verification": {
"listing": {
"round4/X": [
"aqua_rat",
"arc_challenge",
"arc_easy",
"gsm8k",
"gsm8k_test_500",
"gsm_hard",
"humaneval",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"mbpp_plus",
"mbpp_sanitized",
"mbpp_test_held",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"multiarith",
"openbookqa",
"openbookqa_test",
"sciq",
"svamp"
],
"round4/Y": [
"aqua_rat",
"arc_challenge",
"arc_easy",
"gsm8k",
"gsm8k_test_500",
"gsm_hard",
"humaneval",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"mbpp_plus",
"mbpp_sanitized",
"mbpp_test_held",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"multiarith",
"openbookqa",
"openbookqa_test",
"sciq",
"svamp"
],
"round5/X": [
"aqua_rat_numeric",
"conala_curated",
"humaneval",
"math_counting_easy",
"mawps",
"mbpp_sanitized",
"medmcqa_easy",
"pubmedqa_pqal"
],
"round5/Y": [
"aqua_rat_numeric",
"conala_curated",
"humaneval",
"math_counting_easy",
"mawps",
"mbpp_sanitized",
"medmcqa_easy",
"pubmedqa_pqal"
],
"round6/Y_pred": [
"gsm8k_test_500_global_ridge_N12_seed0",
"gsm8k_test_500_global_ridge_N12_seed1",
"gsm8k_test_500_global_ridge_N12_seed2",
"gsm8k_test_500_global_ridge_N16_seed0",
"gsm8k_test_500_global_ridge_N16_seed1",
"gsm8k_test_500_global_ridge_N16_seed2",
"gsm8k_test_500_global_ridge_N24_full",
"gsm8k_test_500_global_ridge_N4_seed0",
"gsm8k_test_500_global_ridge_N4_seed1",
"gsm8k_test_500_global_ridge_N4_seed2",
"gsm8k_test_500_global_ridge_N8_seed0",
"gsm8k_test_500_global_ridge_N8_seed1",
"gsm8k_test_500_global_ridge_N8_seed2",
"gsm8k_test_500_mean_N12_seed0",
"gsm8k_test_500_mean_N12_seed1",
"gsm8k_test_500_mean_N12_seed2",
"gsm8k_test_500_mean_N16_seed0",
"gsm8k_test_500_mean_N16_seed1",
"gsm8k_test_500_mean_N16_seed2",
"gsm8k_test_500_mean_N24_full",
"gsm8k_test_500_mean_N4_seed0",
"gsm8k_test_500_mean_N4_seed1",
"gsm8k_test_500_mean_N4_seed2",
"gsm8k_test_500_mean_N8_seed0",
"gsm8k_test_500_mean_N8_seed1",
"gsm8k_test_500_mean_N8_seed2",
"gsm8k_test_500_topk8_global_ridge_N12_seed0",
"gsm8k_test_500_topk8_global_ridge_N12_seed1",
"gsm8k_test_500_topk8_global_ridge_N12_seed2",
"gsm8k_test_500_topk8_global_ridge_N16_seed0",
"gsm8k_test_500_topk8_global_ridge_N16_seed1",
"gsm8k_test_500_topk8_global_ridge_N16_seed2",
"gsm8k_test_500_topk8_global_ridge_N24_full",
"gsm8k_test_500_topk8_global_ridge_N4_seed0",
"gsm8k_test_500_topk8_global_ridge_N4_seed1",
"gsm8k_test_500_topk8_global_ridge_N4_seed2",
"gsm8k_test_500_topk8_global_ridge_N8_seed0",
"gsm8k_test_500_topk8_global_ridge_N8_seed1",
"gsm8k_test_500_topk8_global_ridge_N8_seed2",
"gsm_hard_global_ridge_N12_seed0",
"gsm_hard_global_ridge_N12_seed1",
"gsm_hard_global_ridge_N12_seed2",
"gsm_hard_global_ridge_N16_seed0",
"gsm_hard_global_ridge_N16_seed1",
"gsm_hard_global_ridge_N16_seed2",
"gsm_hard_global_ridge_N24_full",
"gsm_hard_global_ridge_N4_seed0",
"gsm_hard_global_ridge_N4_seed1",
"gsm_hard_global_ridge_N4_seed2",
"gsm_hard_global_ridge_N8_seed0",
"gsm_hard_global_ridge_N8_seed1",
"gsm_hard_global_ridge_N8_seed2",
"gsm_hard_mean_N12_seed0",
"gsm_hard_mean_N12_seed1",
"gsm_hard_mean_N12_seed2",
"gsm_hard_mean_N16_seed0",
"gsm_hard_mean_N16_seed1",
"gsm_hard_mean_N16_seed2",
"gsm_hard_mean_N24_full",
"gsm_hard_mean_N4_seed0",
"gsm_hard_mean_N4_seed1",
"gsm_hard_mean_N4_seed2",
"gsm_hard_mean_N8_seed0",
"gsm_hard_mean_N8_seed1",
"gsm_hard_mean_N8_seed2",
"gsm_hard_topk8_global_ridge_N12_seed0",
"gsm_hard_topk8_global_ridge_N12_seed1",
"gsm_hard_topk8_global_ridge_N12_seed2",
"gsm_hard_topk8_global_ridge_N16_seed0",
"gsm_hard_topk8_global_ridge_N16_seed1",
"gsm_hard_topk8_global_ridge_N16_seed2",
"gsm_hard_topk8_global_ridge_N24_full",
"gsm_hard_topk8_global_ridge_N4_seed0",
"gsm_hard_topk8_global_ridge_N4_seed1",
"gsm_hard_topk8_global_ridge_N4_seed2",
"gsm_hard_topk8_global_ridge_N8_seed0",
"gsm_hard_topk8_global_ridge_N8_seed1",
"gsm_hard_topk8_global_ridge_N8_seed2",
"mbpp_plus_global_ridge_N12_seed0",
"mbpp_plus_global_ridge_N12_seed1",
"mbpp_plus_global_ridge_N12_seed2",
"mbpp_plus_global_ridge_N16_seed0",
"mbpp_plus_global_ridge_N16_seed1",
"mbpp_plus_global_ridge_N16_seed2",
"mbpp_plus_global_ridge_N24_full",
"mbpp_plus_global_ridge_N4_seed0",
"mbpp_plus_global_ridge_N4_seed1",
"mbpp_plus_global_ridge_N4_seed2",
"mbpp_plus_global_ridge_N8_seed0",
"mbpp_plus_global_ridge_N8_seed1",
"mbpp_plus_global_ridge_N8_seed2",
"mbpp_plus_mean_N12_seed0",
"mbpp_plus_mean_N12_seed1",
"mbpp_plus_mean_N12_seed2",
"mbpp_plus_mean_N16_seed0",
"mbpp_plus_mean_N16_seed1",
"mbpp_plus_mean_N16_seed2",
"mbpp_plus_mean_N24_full",
"mbpp_plus_mean_N4_seed0",
"mbpp_plus_mean_N4_seed1",
"mbpp_plus_mean_N4_seed2",
"mbpp_plus_mean_N8_seed0",
"mbpp_plus_mean_N8_seed1",
"mbpp_plus_mean_N8_seed2",
"mbpp_plus_topk8_global_ridge_N12_seed0",
"mbpp_plus_topk8_global_ridge_N12_seed1",
"mbpp_plus_topk8_global_ridge_N12_seed2",
"mbpp_plus_topk8_global_ridge_N16_seed0",
"mbpp_plus_topk8_global_ridge_N16_seed1",
"mbpp_plus_topk8_global_ridge_N16_seed2",
"mbpp_plus_topk8_global_ridge_N24_full",
"mbpp_plus_topk8_global_ridge_N4_seed0",
"mbpp_plus_topk8_global_ridge_N4_seed1",
"mbpp_plus_topk8_global_ridge_N4_seed2",
"mbpp_plus_topk8_global_ridge_N8_seed0",
"mbpp_plus_topk8_global_ridge_N8_seed1",
"mbpp_plus_topk8_global_ridge_N8_seed2",
"mbpp_test_held_global_ridge_N12_seed0",
"mbpp_test_held_global_ridge_N12_seed1",
"mbpp_test_held_global_ridge_N12_seed2",
"mbpp_test_held_global_ridge_N16_seed0",
"mbpp_test_held_global_ridge_N16_seed1",
"mbpp_test_held_global_ridge_N16_seed2",
"mbpp_test_held_global_ridge_N24_full",
"mbpp_test_held_global_ridge_N4_seed0",
"mbpp_test_held_global_ridge_N4_seed1",
"mbpp_test_held_global_ridge_N4_seed2",
"mbpp_test_held_global_ridge_N8_seed0",
"mbpp_test_held_global_ridge_N8_seed1",
"mbpp_test_held_global_ridge_N8_seed2",
"mbpp_test_held_mean_N12_seed0",
"mbpp_test_held_mean_N12_seed1",
"mbpp_test_held_mean_N12_seed2",
"mbpp_test_held_mean_N16_seed0",
"mbpp_test_held_mean_N16_seed1",
"mbpp_test_held_mean_N16_seed2",
"mbpp_test_held_mean_N24_full",
"mbpp_test_held_mean_N4_seed0",
"mbpp_test_held_mean_N4_seed1",
"mbpp_test_held_mean_N4_seed2",
"mbpp_test_held_mean_N8_seed0",
"mbpp_test_held_mean_N8_seed1",
"mbpp_test_held_mean_N8_seed2",
"mbpp_test_held_topk8_global_ridge_N12_seed0",
"mbpp_test_held_topk8_global_ridge_N12_seed1",
"mbpp_test_held_topk8_global_ridge_N12_seed2",
"mbpp_test_held_topk8_global_ridge_N16_seed0",
"mbpp_test_held_topk8_global_ridge_N16_seed1",
"mbpp_test_held_topk8_global_ridge_N16_seed2",
"mbpp_test_held_topk8_global_ridge_N24_full",
"mbpp_test_held_topk8_global_ridge_N4_seed0",
"mbpp_test_held_topk8_global_ridge_N4_seed1",
"mbpp_test_held_topk8_global_ridge_N4_seed2",
"mbpp_test_held_topk8_global_ridge_N8_seed0",
"mbpp_test_held_topk8_global_ridge_N8_seed1",
"mbpp_test_held_topk8_global_ridge_N8_seed2",
"openbookqa_test_global_ridge_N12_seed0",
"openbookqa_test_global_ridge_N12_seed1",
"openbookqa_test_global_ridge_N12_seed2",
"openbookqa_test_global_ridge_N16_seed0",
"openbookqa_test_global_ridge_N16_seed1",
"openbookqa_test_global_ridge_N16_seed2",
"openbookqa_test_global_ridge_N24_full",
"openbookqa_test_global_ridge_N4_seed0",
"openbookqa_test_global_ridge_N4_seed1",
"openbookqa_test_global_ridge_N4_seed2",
"openbookqa_test_global_ridge_N8_seed0",
"openbookqa_test_global_ridge_N8_seed1",
"openbookqa_test_global_ridge_N8_seed2",
"openbookqa_test_mean_N12_seed0",
"openbookqa_test_mean_N12_seed1",
"openbookqa_test_mean_N12_seed2",
"openbookqa_test_mean_N16_seed0",
"openbookqa_test_mean_N16_seed1",
"openbookqa_test_mean_N16_seed2",
"openbookqa_test_mean_N24_full",
"openbookqa_test_mean_N4_seed0",
"openbookqa_test_mean_N4_seed1",
"openbookqa_test_mean_N4_seed2",
"openbookqa_test_mean_N8_seed0",
"openbookqa_test_mean_N8_seed1",
"openbookqa_test_mean_N8_seed2",
"openbookqa_test_topk8_global_ridge_N12_seed0",
"openbookqa_test_topk8_global_ridge_N12_seed1",
"openbookqa_test_topk8_global_ridge_N12_seed2",
"openbookqa_test_topk8_global_ridge_N16_seed0",
"openbookqa_test_topk8_global_ridge_N16_seed1",
"openbookqa_test_topk8_global_ridge_N16_seed2",
"openbookqa_test_topk8_global_ridge_N24_full",
"openbookqa_test_topk8_global_ridge_N4_seed0",
"openbookqa_test_topk8_global_ridge_N4_seed1",
"openbookqa_test_topk8_global_ridge_N4_seed2",
"openbookqa_test_topk8_global_ridge_N8_seed0",
"openbookqa_test_topk8_global_ridge_N8_seed1",
"openbookqa_test_topk8_global_ridge_N8_seed2"
]
},
"missing": [],
"count_warnings": []
},
"baselines": {
"gsm_hard": {
"base_Y": 0.06333333333333334,
"oracle": 0.15
},
"gsm8k_test_500": {
"base_Y": 0.08,
"oracle": 0.29333333333333333
},
"mbpp_test_held": {
"base_Y": 0.23,
"oracle": 0.32
},
"mbpp_plus": {
"base_Y": 0.21666666666666667,
"oracle": 0.45
},
"openbookqa_test": {
"base_Y": 0.71,
"oracle": 0.9833333333333333
}
},
"r6_reference": {
"N16_global_ridge_gap_recovered_mean": 0.1365069252111935,
"N24_global_ridge_gap_recovered_mean": 0.13478416569879983,
"N24_topk8_global_ridge_gap_recovered_mean": 0.12109363366985319
},
"records": [
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk12_global_ridge",
"topk_K": 12,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk12_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 8.139,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk16_global_ridge",
"topk_K": 16,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk16_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa",
"r5:conala_curated",
"r4:medmcqa_easy",
"r5:medmcqa_easy"
],
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 8.196,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk20_global_ridge",
"topk_K": 20,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk20_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa",
"r5:conala_curated",
"r4:medmcqa_easy",
"r5:medmcqa_easy",
"r4:mbpp",
"r5:mbpp_sanitized",
"r4:sciq",
"r5:math_counting_easy"
],
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 8.624,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk24_global_ridge",
"topk_K": 24,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk24_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa",
"r5:conala_curated",
"r4:medmcqa_easy",
"r5:medmcqa_easy",
"r5:mbpp_sanitized",
"r4:mbpp",
"r4:sciq",
"r5:math_counting_easy",
"r5:aqua_rat_numeric",
"r4:arc_easy",
"r4:gsm8k",
"r5:mawps"
],
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 8.117,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk2_global_ridge",
"topk_K": 2,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk2_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 7.03,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.07812499999999999,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk4_global_ridge",
"topk_K": 4,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk4_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r4:humaneval"
],
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 7.458,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk6_global_ridge",
"topk_K": 6,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk6_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith"
],
"accuracy": 0.08666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 7.563,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.03125000000000001,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"topk_K": 8,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math"
],
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 7.504,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk12_global_ridge",
"topk_K": 12,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk12_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 22.248,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.038461538461538554,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk16_global_ridge",
"topk_K": 16,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk16_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa",
"r5:medmcqa_easy",
"r4:medmcqa_easy",
"r5:conala_curated"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 23.394,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk20_global_ridge",
"topk_K": 20,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk20_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa",
"r5:medmcqa_easy",
"r4:medmcqa_easy",
"r5:conala_curated",
"r4:mbpp",
"r5:mbpp_sanitized",
"r5:math_counting_easy",
"r4:sciq"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 23.314,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk24_global_ridge",
"topk_K": 24,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk24_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa",
"r4:medmcqa_easy",
"r5:medmcqa_easy",
"r5:conala_curated",
"r5:mbpp_sanitized",
"r4:mbpp",
"r5:math_counting_easy",
"r4:sciq",
"r4:gsm8k",
"r5:aqua_rat_numeric",
"r4:arc_easy",
"r5:mawps"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 23.33,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk2_global_ridge",
"topk_K": 2,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk2_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized"
],
"accuracy": 0.03666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 19.311,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.30769230769230776,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk4_global_ridge",
"topk_K": 4,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk4_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r4:humaneval"
],
"accuracy": 0.04666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 24.59,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.19230769230769237,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk6_global_ridge",
"topk_K": 6,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk6_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith"
],
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 22.195,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"topk_K": 8,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math"
],
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 21.359,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.038461538461538554,
"domain": "math"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk12_global_ridge",
"topk_K": 12,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk12_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.27666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 149.129,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.2571428571428571,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk16_global_ridge",
"topk_K": 16,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk16_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r4:medmcqa_easy",
"r5:medmcqa_easy"
],
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 151.069,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk20_global_ridge",
"topk_K": 20,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk20_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r4:medmcqa_easy",
"r5:medmcqa_easy",
"r5:math_counting_easy",
"r4:mbpp",
"r5:mbpp_sanitized",
"r4:arc_easy"
],
"accuracy": 0.2633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 149.578,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.19999999999999984,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk24_global_ridge",
"topk_K": 24,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk24_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r5:medmcqa_easy",
"r4:medmcqa_easy",
"r5:math_counting_easy",
"r4:mbpp",
"r5:mbpp_sanitized",
"r4:arc_easy",
"r4:sciq",
"r4:gsm8k",
"r5:mawps",
"r5:aqua_rat_numeric"
],
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 149.26,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.18571428571428572,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk2_global_ridge",
"topk_K": 2,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk2_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:humaneval"
],
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 151.982,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk4_global_ridge",
"topk_K": 4,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk4_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:humaneval",
"r5:humaneval",
"r4:math_counting_easy"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 148.839,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk6_global_ridge",
"topk_K": 6,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk6_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 152.179,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"topk_K": 8,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math"
],
"accuracy": 0.2733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 152.508,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.24285714285714274,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk12_global_ridge",
"topk_K": 12,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk12_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 50.687,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk16_global_ridge",
"topk_K": 16,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk16_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r4:medmcqa_easy",
"r5:medmcqa_easy"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 50.62,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk20_global_ridge",
"topk_K": 20,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk20_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r5:medmcqa_easy",
"r4:medmcqa_easy",
"r5:mbpp_sanitized",
"r4:mbpp",
"r4:arc_easy",
"r5:math_counting_easy"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 51.775,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk24_global_ridge",
"topk_K": 24,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk24_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:svamp",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r5:medmcqa_easy",
"r4:medmcqa_easy",
"r4:mbpp",
"r5:mbpp_sanitized",
"r4:arc_easy",
"r5:math_counting_easy",
"r4:sciq",
"r5:mawps",
"r4:gsm8k",
"r5:aqua_rat_numeric"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 50.243,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk2_global_ridge",
"topk_K": 2,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk2_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy"
],
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 51.753,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk4_global_ridge",
"topk_K": 4,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk4_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:humaneval",
"r5:humaneval"
],
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 51.777,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk6_global_ridge",
"topk_K": 6,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk6_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 51.66,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"topk_K": 8,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math"
],
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 51.078,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk12_global_ridge",
"topk_K": 12,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk12_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:svamp",
"r4:openbookqa",
"r5:pubmedqa_pqal"
],
"accuracy": 0.7466666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 21.136,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1341463414634149,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk16_global_ridge",
"topk_K": 16,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk16_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:svamp",
"r4:openbookqa",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:medmcqa_easy",
"r4:medmcqa_easy",
"r5:conala_curated"
],
"accuracy": 0.75,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 23.81,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.14634146341463428,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk20_global_ridge",
"topk_K": 20,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk20_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:svamp",
"r4:openbookqa",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:medmcqa_easy",
"r5:conala_curated",
"r4:sciq",
"r5:mbpp_sanitized",
"r4:mbpp",
"r5:math_counting_easy"
],
"accuracy": 0.7466666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 23.886,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1341463414634149,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk24_global_ridge",
"topk_K": 24,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk24_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:svamp",
"r4:openbookqa",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:medmcqa_easy",
"r5:conala_curated",
"r4:sciq",
"r5:mbpp_sanitized",
"r4:mbpp",
"r5:math_counting_easy",
"r4:gsm8k",
"r5:mawps",
"r5:aqua_rat_numeric",
"r4:arc_easy"
],
"accuracy": 0.75,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 21.378,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.14634146341463428,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk2_global_ridge",
"topk_K": 2,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk2_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology"
],
"accuracy": 0.71,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 28.785,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.0,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk4_global_ridge",
"topk_K": 4,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk4_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy"
],
"accuracy": 0.7033333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 17.313,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": -0.024390243902438775,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk6_global_ridge",
"topk_K": 6,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk6_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval"
],
"accuracy": 0.7133333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 23.886,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.012195121951219795,
"domain": "science"
},
{
"subexperiment": "1A_k_sweep",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"topk_K": 8,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith"
],
"accuracy": 0.7133333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 22.785,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.012195121951219795,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed0",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 12.148,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed1",
"selected_topk": null,
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 12.822,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed2",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 8.836,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.08666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 8.773,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.03125000000000001,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 11.491,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.09,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 8.275,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.04687499999999998,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed0",
"selected_topk": null,
"accuracy": 0.09,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 9.787,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.04687499999999998,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed1",
"selected_topk": null,
"accuracy": 0.10666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 11.914,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.12500000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed2",
"selected_topk": null,
"accuracy": 0.08666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 9.246,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.03125000000000001,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed0",
"selected_topk": null,
"accuracy": 0.10666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 8.994,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.12500000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed1",
"selected_topk": null,
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 9.926,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed2",
"selected_topk": null,
"accuracy": 0.10666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 7.993,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.12500000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.08333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 8.441,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.015624999999999972,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.08333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 8.255,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.015624999999999972,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 7.784,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed0",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 9.276,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed1",
"selected_topk": null,
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 9.883,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed2",
"selected_topk": null,
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 8.051,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N24_full",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 10.998,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.08666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 8.517,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.03125000000000001,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N24_full",
"selected_topk": null,
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 8.354,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed0",
"selected_topk": null,
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 28.943,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.15384615384615374,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed1",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.182,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 25.292,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 26.471,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 27.19,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 22.78,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed0",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.412,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed1",
"selected_topk": null,
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 26.601,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.11538461538461534,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 26.109,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed0",
"selected_topk": null,
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 24.319,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.15384615384615374,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed1",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 27.608,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 24.916,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 24.16,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 24.881,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 24.068,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed0",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 26.767,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed1",
"selected_topk": null,
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 23.901,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 26.267,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N24_full",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 28.154,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 25.493,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N24_full",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 25.138,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed0",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 152.235,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed1",
"selected_topk": null,
"accuracy": 0.22333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 153.508,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.02857142857142852,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed2",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 154.26,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 156.161,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.2633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 151.866,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.19999999999999984,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 150.685,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed0",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 155.572,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed1",
"selected_topk": null,
"accuracy": 0.23666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 152.068,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.08571428571428567,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed2",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 147.141,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.14285714285714282,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed0",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 154.561,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed1",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 152.767,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed2",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 155.531,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.2633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 149.424,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.19999999999999984,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 153.68,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 153.64,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed0",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 152.497,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.057142857142857155,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed1",
"selected_topk": null,
"accuracy": 0.23333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 144.794,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.07142857142857141,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed2",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 155.568,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.09999999999999992,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N24_full",
"selected_topk": null,
"accuracy": 0.21,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 156.101,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.028571428571428636,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.2733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 153.146,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.24285714285714274,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N24_full",
"selected_topk": null,
"accuracy": 0.22333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 151.112,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.02857142857142852,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 46.874,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed1",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 46.856,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed2",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 48.369,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 50.528,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 50.432,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 50.815,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 48.905,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed1",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 51.877,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed2",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 51.357,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 48.404,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed1",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 49.682,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed2",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 47.994,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 51.19,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 51.786,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 51.484,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 47.247,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed1",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 51.2,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed2",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 50.316,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N24_full",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 48.081,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 50.253,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N24_full",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 48.42,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed0",
"selected_topk": null,
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 16.579,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed1",
"selected_topk": null,
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 6.928,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed2",
"selected_topk": null,
"accuracy": 0.7666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 7.813,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.20731707317073203,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 17.824,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.07317073170731714,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.7,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 12.94,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": -0.03658536585365857,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.7666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 7.8,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.20731707317073203,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed0",
"selected_topk": null,
"accuracy": 0.7266666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 14.833,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.060975609756097754,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed1",
"selected_topk": null,
"accuracy": 0.7266666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 9.166,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.060975609756097754,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed2",
"selected_topk": null,
"accuracy": 0.7666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 5.952,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.20731707317073203,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed0",
"selected_topk": null,
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 10.527,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.08536585365853654,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed1",
"selected_topk": null,
"accuracy": 0.7666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 11.084,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.20731707317073203,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed2",
"selected_topk": null,
"accuracy": 0.7666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 8.293,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.20731707317073203,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 15.941,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.08536585365853654,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 14.792,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.7633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 7.42,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.19512195121951226,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed0",
"selected_topk": null,
"accuracy": 0.7433333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 9.618,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1219512195121951,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed1",
"selected_topk": null,
"accuracy": 0.7466666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 11.075,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1341463414634149,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed2",
"selected_topk": null,
"accuracy": 0.76,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 4.516,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.18292682926829285,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_pca",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N24_full",
"selected_topk": null,
"accuracy": 0.7666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 13.996,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.20731707317073203,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "pertensor_ridge",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.7466666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 23.164,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1341463414634149,
"domain": "science"
},
{
"subexperiment": "1B_pertensor_methods",
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "procrustes",
"topk_K": null,
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N24_full",
"selected_topk": null,
"accuracy": 0.74,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 7.859,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.10975609756097571,
"domain": "science"
}
],
"summary": {
"k_sweep": {
"2": {
"n_records": 5,
"gap_recovered_mean": 0.03236034798534796,
"gap_recovered_std": 0.2512825052045027,
"accuracy_mean": 0.2673333333333333
},
"4": {
"n_records": 5,
"gap_recovered_mean": 0.06904136513892616,
"gap_recovered_std": 0.21041601120601108,
"accuracy_mean": 0.272
},
"6": {
"n_records": 5,
"gap_recovered_mean": 0.08346313916435871,
"gap_recovered_std": 0.13586719552143517,
"accuracy_mean": 0.2753333333333333
},
"8": {
"n_records": 5,
"gap_recovered_mean": 0.12873481193603145,
"gap_recovered_std": 0.15624061361170813,
"accuracy_mean": 0.2813333333333333
},
"12": {
"n_records": 5,
"gap_recovered_mean": 0.13063497647339112,
"gap_recovered_std": 0.1180632522546757,
"accuracy_mean": 0.28600000000000003
},
"16": {
"n_records": 5,
"gap_recovered_mean": 0.12906987998451414,
"gap_recovered_std": 0.09660226150894938,
"accuracy_mean": 0.2846666666666667
},
"20": {
"n_records": 5,
"gap_recovered_mean": 0.13002371273712737,
"gap_recovered_std": 0.08893282592449571,
"accuracy_mean": 0.2846666666666667
},
"24": {
"n_records": 5,
"gap_recovered_mean": 0.12648059427022842,
"gap_recovered_std": 0.08860748821676426,
"accuracy_mean": 0.284
}
},
"pertensor_methods": {
"pertensor_ridge": {
"12": {
"n_records": 15,
"gap_recovered_mean": 0.1148817030981665,
"gap_recovered_std": 0.10917220312300747,
"accuracy_mean": 0.2786666666666667
},
"16": {
"n_records": 15,
"gap_recovered_mean": 0.12140822761249587,
"gap_recovered_std": 0.0932870579211698,
"accuracy_mean": 0.2824444444444445
},
"24": {
"n_records": 5,
"gap_recovered_mean": 0.12609514130855595,
"gap_recovered_std": 0.10935514163267879,
"accuracy_mean": 0.28400000000000003
}
},
"procrustes": {
"12": {
"n_records": 15,
"gap_recovered_mean": 0.10543183611781175,
"gap_recovered_std": 0.08157963485059341,
"accuracy_mean": 0.2773333333333333
},
"16": {
"n_records": 15,
"gap_recovered_mean": 0.10409506313469727,
"gap_recovered_std": 0.0855081370879396,
"accuracy_mean": 0.2791111111111111
},
"24": {
"n_records": 5,
"gap_recovered_mean": 0.09999456505554065,
"gap_recovered_std": 0.07428709190008917,
"accuracy_mean": 0.2753333333333333
}
},
"pertensor_pca": {
"12": {
"n_records": 15,
"gap_recovered_mean": 0.09990568261909726,
"gap_recovered_std": 0.07590792386005181,
"accuracy_mean": 0.27644444444444444
},
"16": {
"n_records": 15,
"gap_recovered_mean": 0.10040114977614979,
"gap_recovered_std": 0.07987630180578464,
"accuracy_mean": 0.2777777777777778
},
"24": {
"n_records": 5,
"gap_recovered_mean": 0.1143281887489205,
"gap_recovered_std": 0.10311517826966993,
"accuracy_mean": 0.2793333333333333
}
}
},
"n24_six_method_comparison": {
"mean": {
"source": "R6",
"n_records": 5,
"gap_recovered_mean": 0.0830787285208017,
"gap_recovered_std": 0.07181727060927716
},
"global_ridge": {
"source": "R6",
"n_records": 5,
"gap_recovered_mean": 0.13478416569879983,
"gap_recovered_std": 0.10350184199429305
},
"topk8_global_ridge": {
"source": "R6",
"n_records": 5,
"gap_recovered_mean": 0.12109363366985318,
"gap_recovered_std": 0.12401845134797244
},
"pertensor_ridge": {
"source": "R8",
"n_records": 5,
"gap_recovered_mean": 0.12609514130855595,
"gap_recovered_std": 0.10935514163267879
},
"procrustes": {
"source": "R8",
"n_records": 5,
"gap_recovered_mean": 0.09999456505554065,
"gap_recovered_std": 0.07428709190008917
},
"pertensor_pca": {
"source": "R8",
"n_records": 5,
"gap_recovered_mean": 0.1143281887489205,
"gap_recovered_std": 0.10311517826966993
}
}
},
"derived_from_r6_records": {
"description": "Per-task N=16 stats derived from R6 records plus R8 N=16 new-method records; no new compute for R6 columns.",
"per_task": {
"gsm_hard": {
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"r6_N16": {
"mean": {
"accuracy": {
"mean": 0.06555555555555555,
"std": 0.0050917507721731595,
"n": 3
},
"gap_recovered": {
"mean": 0.025641025641025595,
"std": 0.058750970448151855,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"global_ridge": {
"accuracy": {
"mean": 0.061111111111111116,
"std": 0.0038490017945975096,
"n": 3
},
"gap_recovered": {
"mean": -0.025641025641025675,
"std": 0.04441155916843281,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"topk8_global_ridge": {
"accuracy": {
"mean": 0.06222222222222223,
"std": 0.005091750772173158,
"n": 3
},
"gap_recovered": {
"mean": -0.012820512820512877,
"std": 0.05875097044815183,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"r8_N16": {
"pertensor_ridge": {
"accuracy": {
"mean": 0.06444444444444446,
"std": 0.0019245008972987488,
"n": 3
},
"gap_recovered": {
"mean": 0.012820512820512798,
"std": 0.02220577958421634,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"procrustes": {
"accuracy": {
"mean": 0.062222222222222213,
"std": 0.005091750772173157,
"n": 3
},
"gap_recovered": {
"mean": -0.012820512820512877,
"std": 0.05875097044815183,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"pertensor_pca": {
"accuracy": {
"mean": 0.07,
"std": 0.006666666666666661,
"n": 3
},
"gap_recovered": {
"mean": 0.07692307692307689,
"std": 0.07692307692307687,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"best_R6_N16": {
"method": "mean",
"source": "R6",
"accuracy_mean": 0.06555555555555555,
"accuracy_std": 0.0050917507721731595,
"gap_recovered_mean": 0.025641025641025595,
"gap_recovered_std": 0.058750970448151855,
"n": 3
},
"best_R8_new_N16": {
"method": "pertensor_pca",
"source": "R8",
"accuracy_mean": 0.07,
"accuracy_std": 0.006666666666666661,
"gap_recovered_mean": 0.07692307692307689,
"gap_recovered_std": 0.07692307692307687,
"n": 3
},
"best_learned_N16": {
"method": "pertensor_pca",
"source": "R8",
"accuracy_mean": 0.07,
"accuracy_std": 0.006666666666666661,
"gap_recovered_mean": 0.07692307692307689,
"gap_recovered_std": 0.07692307692307687,
"n": 3
}
},
"gsm8k_test_500": {
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"r6_N16": {
"mean": {
"accuracy": {
"mean": 0.10222222222222221,
"std": 0.001924500897298749,
"n": 3
},
"gap_recovered": {
"mean": 0.10416666666666667,
"std": 0.009021097956087886,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"global_ridge": {
"accuracy": {
"mean": 0.09222222222222222,
"std": 0.010715167512214395,
"n": 3
},
"gap_recovered": {
"mean": 0.057291666666666664,
"std": 0.05022734771350498,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"topk8_global_ridge": {
"accuracy": {
"mean": 0.09333333333333334,
"std": 0.008819171036881974,
"n": 3
},
"gap_recovered": {
"mean": 0.0625,
"std": 0.04133986423538425,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"r8_N16": {
"pertensor_ridge": {
"accuracy": {
"mean": 0.08777777777777777,
"std": 0.0076980035891950115,
"n": 3
},
"gap_recovered": {
"mean": 0.03645833333333331,
"std": 0.036084391824351615,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"procrustes": {
"accuracy": {
"mean": 0.09888888888888887,
"std": 0.0050917507721731535,
"n": 3
},
"gap_recovered": {
"mean": 0.08854166666666667,
"std": 0.023867581744561665,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"pertensor_pca": {
"accuracy": {
"mean": 0.10555555555555556,
"std": 0.0019245008972987568,
"n": 3
},
"gap_recovered": {
"mean": 0.11979166666666669,
"std": 0.00902109795608792,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"best_R6_N16": {
"method": "mean",
"source": "R6",
"accuracy_mean": 0.10222222222222221,
"accuracy_std": 0.001924500897298749,
"gap_recovered_mean": 0.10416666666666667,
"gap_recovered_std": 0.009021097956087886,
"n": 3
},
"best_R8_new_N16": {
"method": "pertensor_pca",
"source": "R8",
"accuracy_mean": 0.10555555555555556,
"accuracy_std": 0.0019245008972987568,
"gap_recovered_mean": 0.11979166666666669,
"gap_recovered_std": 0.00902109795608792,
"n": 3
},
"best_learned_N16": {
"method": "pertensor_pca",
"source": "R8",
"accuracy_mean": 0.10555555555555556,
"accuracy_std": 0.0019245008972987568,
"gap_recovered_mean": 0.11979166666666669,
"gap_recovered_std": 0.00902109795608792,
"n": 3
}
},
"mbpp_test_held": {
"base_Y": 0.23,
"oracle": 0.32,
"r6_N16": {
"mean": {
"accuracy": {
"mean": 0.24,
"std": 0.0,
"n": 3
},
"gap_recovered": {
"mean": 0.1111111111111109,
"std": 1.6996749443881478e-17,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"global_ridge": {
"accuracy": {
"mean": 0.25666666666666665,
"std": 0.005773502691896262,
"n": 3
},
"gap_recovered": {
"mean": 0.2962962962962963,
"std": 0.06415002990995847,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"topk8_global_ridge": {
"accuracy": {
"mean": 0.25,
"std": 0.0,
"n": 3
},
"gap_recovered": {
"mean": 0.22222222222222213,
"std": 0.0,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"r8_N16": {
"pertensor_ridge": {
"accuracy": {
"mean": 0.25,
"std": 0.0,
"n": 3
},
"gap_recovered": {
"mean": 0.22222222222222213,
"std": 0.0,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"procrustes": {
"accuracy": {
"mean": 0.25,
"std": 0.0,
"n": 3
},
"gap_recovered": {
"mean": 0.22222222222222213,
"std": 0.0,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"pertensor_pca": {
"accuracy": {
"mean": 0.24333333333333332,
"std": 0.005773502691896263,
"n": 3
},
"gap_recovered": {
"mean": 0.148148148148148,
"std": 0.06415002990995848,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"best_R6_N16": {
"method": "global_ridge",
"source": "R6",
"accuracy_mean": 0.25666666666666665,
"accuracy_std": 0.005773502691896262,
"gap_recovered_mean": 0.2962962962962963,
"gap_recovered_std": 0.06415002990995847,
"n": 3
},
"best_R8_new_N16": {
"method": "pertensor_ridge",
"source": "R8",
"accuracy_mean": 0.25,
"accuracy_std": 0.0,
"gap_recovered_mean": 0.22222222222222213,
"gap_recovered_std": 0.0,
"n": 3
},
"best_learned_N16": {
"method": "global_ridge",
"source": "R6",
"accuracy_mean": 0.25666666666666665,
"accuracy_std": 0.005773502691896262,
"gap_recovered_mean": 0.2962962962962963,
"gap_recovered_std": 0.06415002990995847,
"n": 3
}
},
"mbpp_plus": {
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"r6_N16": {
"mean": {
"accuracy": {
"mean": 0.21222222222222223,
"std": 0.0019245008972987648,
"n": 3
},
"gap_recovered": {
"mean": -0.019047619047619053,
"std": 0.008247860988423278,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"global_ridge": {
"accuracy": {
"mean": 0.27,
"std": 0.003333333333333327,
"n": 3
},
"gap_recovered": {
"mean": 0.22857142857142854,
"std": 0.014285714285714249,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"topk8_global_ridge": {
"accuracy": {
"mean": 0.2688888888888889,
"std": 0.0038490017945974975,
"n": 3
},
"gap_recovered": {
"mean": 0.22380952380952376,
"std": 0.016495721976846407,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"r8_N16": {
"pertensor_ridge": {
"accuracy": {
"mean": 0.26555555555555554,
"std": 0.0019245008972987648,
"n": 3
},
"gap_recovered": {
"mean": 0.20952380952380945,
"std": 0.008247860988423292,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"procrustes": {
"accuracy": {
"mean": 0.23444444444444446,
"std": 0.0050917507721731465,
"n": 3
},
"gap_recovered": {
"mean": 0.07619047619047616,
"std": 0.021821789023599193,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"pertensor_pca": {
"accuracy": {
"mean": 0.21444444444444447,
"std": 0.0019245008972987488,
"n": 3
},
"gap_recovered": {
"mean": -0.009523809523809506,
"std": 0.008247860988423209,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"best_R6_N16": {
"method": "global_ridge",
"source": "R6",
"accuracy_mean": 0.27,
"accuracy_std": 0.003333333333333327,
"gap_recovered_mean": 0.22857142857142854,
"gap_recovered_std": 0.014285714285714249,
"n": 3
},
"best_R8_new_N16": {
"method": "pertensor_ridge",
"source": "R8",
"accuracy_mean": 0.26555555555555554,
"accuracy_std": 0.0019245008972987648,
"gap_recovered_mean": 0.20952380952380945,
"gap_recovered_std": 0.008247860988423292,
"n": 3
},
"best_learned_N16": {
"method": "global_ridge",
"source": "R6",
"accuracy_mean": 0.27,
"accuracy_std": 0.003333333333333327,
"gap_recovered_mean": 0.22857142857142854,
"gap_recovered_std": 0.014285714285714249,
"n": 3
}
},
"openbookqa_test": {
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"r6_N16": {
"mean": {
"accuracy": {
"mean": 0.7544444444444444,
"std": 0.001924500897298797,
"n": 3
},
"gap_recovered": {
"mean": 0.16260162601626027,
"std": 0.007040856941337066,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"global_ridge": {
"accuracy": {
"mean": 0.7444444444444445,
"std": 0.015030832509409663,
"n": 3
},
"gap_recovered": {
"mean": 0.1260162601626017,
"std": 0.054990850644181695,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"topk8_global_ridge": {
"accuracy": {
"mean": 0.7433333333333333,
"std": 0.01527525231651948,
"n": 3
},
"gap_recovered": {
"mean": 0.12195121951219523,
"std": 0.05588506945068102,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"r8_N16": {
"pertensor_ridge": {
"accuracy": {
"mean": 0.7444444444444445,
"std": 0.016442942874387488,
"n": 3
},
"gap_recovered": {
"mean": 0.1260162601626017,
"std": 0.060157108077027406,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"procrustes": {
"accuracy": {
"mean": 0.75,
"std": 0.008819171036881984,
"n": 3
},
"gap_recovered": {
"mean": 0.14634146341463428,
"std": 0.03226525989103164,
"n": 3
},
"seeds": [
0,
1,
2
]
},
"pertensor_pca": {
"accuracy": {
"mean": 0.7555555555555555,
"std": 0.019245008972987587,
"n": 3
},
"gap_recovered": {
"mean": 0.16666666666666685,
"std": 0.07040856941336922,
"n": 3
},
"seeds": [
0,
1,
2
]
}
},
"best_R6_N16": {
"method": "mean",
"source": "R6",
"accuracy_mean": 0.7544444444444444,
"accuracy_std": 0.001924500897298797,
"gap_recovered_mean": 0.16260162601626027,
"gap_recovered_std": 0.007040856941337066,
"n": 3
},
"best_R8_new_N16": {
"method": "pertensor_pca",
"source": "R8",
"accuracy_mean": 0.7555555555555555,
"accuracy_std": 0.019245008972987587,
"gap_recovered_mean": 0.16666666666666685,
"gap_recovered_std": 0.07040856941336922,
"n": 3
},
"best_learned_N16": {
"method": "pertensor_pca",
"source": "R8",
"accuracy_mean": 0.7555555555555555,
"accuracy_std": 0.019245008972987587,
"gap_recovered_mean": 0.16666666666666685,
"gap_recovered_std": 0.07040856941336922,
"n": 3
}
}
}
},
"derived_from_r6_records_source": {
"results_round6_json_pulled_from_hub": true,
"r6_record_count": 195
}
}