cross-model-lora-prediction-3b / results_round6.json
CK0607's picture
Round 6 real generation eval: results_round6.json
4f5758d verified
raw
history blame
189 kB
{
"config": {
"model_X": "Qwen/Qwen2.5-3B-Instruct",
"model_Y": "meta-llama/Llama-3.2-3B-Instruct",
"hub_repo": "CK0607/cross-model-lora-prediction-3b",
"round6_real_generation_eval": true,
"round5_surrogate_deprecated": true,
"no_surrogate": true,
"eval_examples_requested": 300,
"generation": {
"do_sample": false,
"num_beams": 1,
"greedy": true,
"max_new_tokens_code": 96,
"max_new_tokens_other": 24
},
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"heldouts": [
"gsm_hard",
"gsm8k_test_500",
"mbpp_test_held",
"mbpp_plus",
"openbookqa_test"
],
"N_values": [
4,
8,
12,
16,
24
],
"methods": [
"mean",
"global_ridge",
"topk8_global_ridge"
],
"seeds_for_subsampled_N": [
0,
1,
2
],
"N24_seed": 0,
"budget_reduction": null,
"wall_seconds": 724.129
},
"adapter_verification": {
"listing": {
"round4/X": [
"aqua_rat",
"arc_challenge",
"arc_easy",
"gsm8k",
"gsm8k_test_500",
"gsm_hard",
"humaneval",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"mbpp_plus",
"mbpp_sanitized",
"mbpp_test_held",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"multiarith",
"openbookqa",
"openbookqa_test",
"sciq",
"svamp"
],
"round4/Y": [
"aqua_rat",
"arc_challenge",
"arc_easy",
"gsm8k",
"gsm8k_test_500",
"gsm_hard",
"humaneval",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"mbpp_plus",
"mbpp_sanitized",
"mbpp_test_held",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"multiarith",
"openbookqa",
"openbookqa_test",
"sciq",
"svamp"
],
"round5/X": [
"aqua_rat_numeric",
"conala_curated",
"humaneval",
"math_counting_easy",
"mawps",
"mbpp_sanitized",
"medmcqa_easy",
"pubmedqa_pqal"
],
"round5/Y": [
"aqua_rat_numeric",
"conala_curated",
"humaneval",
"math_counting_easy",
"mawps",
"mbpp_sanitized",
"medmcqa_easy",
"pubmedqa_pqal"
]
},
"missing": []
},
"baselines": {
"gsm_hard": {
"base_Y": 0.06333333333333334,
"oracle": 0.15
},
"gsm8k_test_500": {
"base_Y": 0.08,
"oracle": 0.29333333333333333
},
"mbpp_test_held": {
"base_Y": 0.23,
"oracle": 0.32
},
"mbpp_plus": {
"base_Y": 0.21666666666666667,
"oracle": 0.45
},
"openbookqa_test": {
"base_Y": 0.71,
"oracle": 0.9833333333333333
}
},
"records": [
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N4_seed0",
"selected_topk": null,
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 16.047,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N4_seed1",
"selected_topk": null,
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 6.767,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.10937500000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N4_seed2",
"selected_topk": null,
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 6.193,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.015625000000000038,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N4_seed0",
"selected_topk": null,
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 12.83,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N4_seed1",
"selected_topk": null,
"accuracy": 0.08666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 5.713,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.03125000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N4_seed2",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 7.464,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N4_seed0",
"selected_topk": [
"r4:aqua_rat",
"r5:conala_curated",
"r5:math_counting_easy"
],
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 15.428,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.015625000000000038,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N4_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 7.106,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N4_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r5:conala_curated",
"r5:aqua_rat_numeric"
],
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 6.161,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.03125000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N8_seed0",
"selected_topk": null,
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 10.252,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N8_seed1",
"selected_topk": null,
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 11.38,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.03125000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N8_seed2",
"selected_topk": null,
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 5.259,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N8_seed0",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 8.174,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N8_seed1",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 9.26,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N8_seed2",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 6.14,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N8_seed0",
"selected_topk": [
"r5:humaneval",
"r4:mmlu_elementary_math",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated",
"r4:mbpp",
"r5:math_counting_easy"
],
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 8.825,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N8_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:math_counting_easy"
],
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 13.534,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.03125000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N8_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:mbpp_sanitized",
"r5:aqua_rat_numeric"
],
"accuracy": 0.08333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 5.216,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.015624999999999972,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 4.862,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 6.151,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.09,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 4.764,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.04687499999999998,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N12_seed0",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 10.116,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N12_seed1",
"selected_topk": null,
"accuracy": 0.10666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 9.274,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.12500000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N12_seed2",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 5.933,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N12_seed0",
"selected_topk": [
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated"
],
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 5.642,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N12_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"accuracy": 0.09,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 6.188,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.04687499999999998,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N12_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:humaneval",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:openbookqa",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 4.729,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 4.316,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 4.721,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 4.675,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N16_seed0",
"selected_topk": null,
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 5.288,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N16_seed1",
"selected_topk": null,
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 5.185,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N16_seed2",
"selected_topk": null,
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 5.741,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N16_seed0",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology"
],
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 4.798,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.09375000000000003,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N16_seed1",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa"
],
"accuracy": 0.08333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 4.775,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.015624999999999972,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N16_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:openbookqa",
"r5:conala_curated"
],
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 4.732,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 4.367,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.06250000000000001,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N24_full",
"selected_topk": null,
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 5.883,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375,
"domain": "math"
},
{
"task": "gsm8k_test_500",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math"
],
"accuracy": 0.09,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 4.254,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.04687499999999998,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N4_seed0",
"selected_topk": null,
"accuracy": 0.05,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 18.317,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.15384615384615388,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N4_seed1",
"selected_topk": null,
"accuracy": 0.03666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 14.877,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.30769230769230776,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N4_seed2",
"selected_topk": null,
"accuracy": 0.04666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 14.164,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.19230769230769237,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N4_seed0",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 18.709,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N4_seed1",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 15.851,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N4_seed2",
"selected_topk": null,
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 15.649,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N4_seed0",
"selected_topk": [
"r4:aqua_rat",
"r5:conala_curated",
"r5:math_counting_easy"
],
"accuracy": 0.05333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 18.151,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.11538461538461542,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N4_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.03333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 16.423,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.34615384615384626,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N4_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r5:conala_curated",
"r5:aqua_rat_numeric"
],
"accuracy": 0.04,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 14.211,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.26923076923076933,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N8_seed0",
"selected_topk": null,
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 17.133,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.1923076923076923,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N8_seed1",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 17.412,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N8_seed2",
"selected_topk": null,
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 13.813,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.038461538461538554,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N8_seed0",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 16.831,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N8_seed1",
"selected_topk": null,
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 19.168,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.15384615384615374,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N8_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 15.834,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N8_seed0",
"selected_topk": [
"r5:humaneval",
"r4:mmlu_elementary_math",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated",
"r4:mbpp",
"r5:math_counting_easy"
],
"accuracy": 0.08333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 17.033,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.23076923076923067,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N8_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:medmcqa_easy",
"r5:conala_curated",
"r5:math_counting_easy"
],
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 17.685,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N8_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:medmcqa_easy",
"r5:conala_curated",
"r5:mbpp_sanitized",
"r5:aqua_rat_numeric"
],
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 12.006,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 14.247,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.038461538461538554,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 16.792,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 10.577,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N12_seed0",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 17.127,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N12_seed1",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 14.327,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N12_seed2",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 13.838,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N12_seed0",
"selected_topk": [
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology",
"r4:svamp",
"r4:aqua_rat",
"r5:medmcqa_easy"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 16.357,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N12_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:medmcqa_easy",
"r5:conala_curated"
],
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 16.623,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.11538461538461534,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N12_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:humaneval",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:openbookqa",
"r5:medmcqa_easy",
"r5:conala_curated"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 10.698,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 10.982,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 11.658,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 11.127,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N16_seed0",
"selected_topk": null,
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 13.972,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.07692307692307694,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N16_seed1",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 14.059,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N16_seed2",
"selected_topk": null,
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 14.114,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.038461538461538554,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N16_seed0",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math",
"r4:mmlu_high_school_biology"
],
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 10.721,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N16_seed1",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r4:openbookqa"
],
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 11.104,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N16_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:openbookqa",
"r5:medmcqa_easy"
],
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 10.618,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.07692307692307702,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 11.544,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N24_full",
"selected_topk": null,
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 13.897,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "gsm_hard",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:mmlu_high_school_physics",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:mmlu_elementary_math"
],
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 10.731,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.038461538461538394,
"domain": "math"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N4_seed0",
"selected_topk": null,
"accuracy": 0.20666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 91.152,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.04285714285714289,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N4_seed1",
"selected_topk": null,
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 88.715,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N4_seed2",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 92.565,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N4_seed0",
"selected_topk": null,
"accuracy": 0.20666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 92.101,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.04285714285714289,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N4_seed1",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 94.791,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N4_seed2",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 92.398,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N4_seed0",
"selected_topk": [
"r4:aqua_rat",
"r5:conala_curated",
"r5:math_counting_easy"
],
"accuracy": 0.20666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 91.879,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.04285714285714289,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N4_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 86.762,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N4_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r5:conala_curated",
"r4:arc_easy"
],
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 92.694,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N8_seed0",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 87.079,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N8_seed1",
"selected_topk": null,
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 87.223,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N8_seed2",
"selected_topk": null,
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 90.746,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N8_seed0",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 89.515,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N8_seed1",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 92.797,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N8_seed2",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 94.052,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N8_seed0",
"selected_topk": [
"r5:humaneval",
"r4:mmlu_elementary_math",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated",
"r5:math_counting_easy",
"r4:mbpp"
],
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 89.202,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N8_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:math_counting_easy"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 87.665,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N8_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:mbpp_sanitized",
"r4:arc_easy"
],
"accuracy": 0.2733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 72.699,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.24285714285714274,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 92.332,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.27666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 71.408,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.2571428571428571,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.27666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 71.427,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.2571428571428571,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N12_seed0",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 91.029,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N12_seed1",
"selected_topk": null,
"accuracy": 0.21,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 75.373,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.028571428571428636,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N12_seed2",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 75.328,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N12_seed0",
"selected_topk": [
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated"
],
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 74.153,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N12_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:mmlu_high_school_physics",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 72.042,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N12_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:humaneval",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r5:medmcqa_easy"
],
"accuracy": 0.2633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 71.075,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.19999999999999984,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 71.109,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.22857142857142862,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.2733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 70.902,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.24285714285714274,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 73.046,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N16_seed0",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 73.969,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N16_seed1",
"selected_topk": null,
"accuracy": 0.21,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 78.014,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.028571428571428636,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N16_seed2",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 75.572,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N16_seed0",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy"
],
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 70.097,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N16_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated"
],
"accuracy": 0.2733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 70.33,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.24285714285714274,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N16_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:humaneval",
"r4:math_counting_easy",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa"
],
"accuracy": 0.26666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 69.202,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.21428571428571425,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.2733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 69.544,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.24285714285714274,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N24_full",
"selected_topk": null,
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 77.605,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426,
"domain": "code"
},
{
"task": "mbpp_plus",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:humaneval",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math"
],
"accuracy": 0.2833333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 71.1,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.28571428571428564,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N4_seed0",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 30.304,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N4_seed1",
"selected_topk": null,
"accuracy": 0.28,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 30.334,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.5555555555555558,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N4_seed2",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 29.778,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N4_seed0",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 30.954,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N4_seed1",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 29.348,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N4_seed2",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 30.259,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N4_seed0",
"selected_topk": [
"r4:aqua_rat",
"r5:conala_curated",
"r4:arc_easy"
],
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 29.348,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N4_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 29.833,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.44444444444444453,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N4_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r5:conala_curated",
"r4:arc_easy"
],
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 29.391,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N8_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 28.852,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N8_seed1",
"selected_topk": null,
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 30.289,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.44444444444444453,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N8_seed2",
"selected_topk": null,
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 28.2,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N8_seed0",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 29.436,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N8_seed1",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 28.812,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N8_seed2",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 30.096,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N8_seed0",
"selected_topk": [
"r5:humaneval",
"r4:mmlu_elementary_math",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated",
"r4:mbpp",
"r4:arc_easy"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 28.379,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N8_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:math_counting_easy"
],
"accuracy": 0.27,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 31.024,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.44444444444444453,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N8_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:mbpp_sanitized",
"r4:arc_easy"
],
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 29.802,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 29.335,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 24.376,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 25.224,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N12_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 29.425,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N12_seed1",
"selected_topk": null,
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 25.255,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N12_seed2",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 24.291,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N12_seed0",
"selected_topk": [
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 28.665,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N12_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:mmlu_high_school_physics",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 24.393,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N12_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:humaneval",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa",
"r5:medmcqa_easy"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 24.33,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 24.197,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 24.539,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 24.852,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.3333333333333333,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N16_seed0",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 23.992,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N16_seed1",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 24.993,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N16_seed2",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 24.299,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N16_seed0",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 25.384,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N16_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:mmlu_high_school_physics",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:conala_curated"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 25.052,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N16_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:conala_curated",
"r4:openbookqa"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 24.877,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 23.396,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N24_full",
"selected_topk": null,
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 23.907,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091,
"domain": "code"
},
{
"task": "mbpp_test_held",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:mmlu_elementary_math"
],
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 24.651,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213,
"domain": "code"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N4_seed0",
"selected_topk": null,
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 10.099,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.07317073170731714,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N4_seed1",
"selected_topk": null,
"accuracy": 0.69,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 4.078,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": -0.07317073170731714,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N4_seed2",
"selected_topk": null,
"accuracy": 0.71,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 2.76,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.0,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N4_seed0",
"selected_topk": null,
"accuracy": 0.75,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 2.965,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.14634146341463428,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N4_seed1",
"selected_topk": null,
"accuracy": 0.74,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 4.032,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.10975609756097571,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N4_seed2",
"selected_topk": null,
"accuracy": 0.7466666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 3.041,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1341463414634149,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N4_seed0",
"selected_topk": [
"r4:aqua_rat",
"r5:conala_curated",
"r5:math_counting_easy"
],
"accuracy": 0.72,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 9.433,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.03658536585365857,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N4_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.69,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 4.006,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": -0.07317073170731714,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 4,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r5:aqua_rat_numeric",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N4_seed2",
"selected_topk": [
"r4:math_counting_easy",
"r5:conala_curated",
"r5:aqua_rat_numeric"
],
"accuracy": 0.7166666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 3.818,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.02439024390243918,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N8_seed0",
"selected_topk": null,
"accuracy": 0.7433333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 6.422,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1219512195121951,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N8_seed1",
"selected_topk": null,
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 5.598,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.08536585365853654,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N8_seed2",
"selected_topk": null,
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 3.741,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.08536585365853654,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N8_seed0",
"selected_topk": null,
"accuracy": 0.7533333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 4.328,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.15853658536585366,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N8_seed1",
"selected_topk": null,
"accuracy": 0.7433333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 3.192,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1219512195121951,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N8_seed2",
"selected_topk": null,
"accuracy": 0.74,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 2.692,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.10975609756097571,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N8_seed0",
"selected_topk": [
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:svamp",
"r4:aqua_rat",
"r5:conala_curated",
"r4:mbpp",
"r5:math_counting_easy"
],
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 6.351,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp_sanitized",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N8_seed1",
"selected_topk": [
"r4:mbpp_sanitized",
"r5:humaneval",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:medmcqa_easy",
"r5:conala_curated",
"r5:math_counting_easy"
],
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 4.449,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 8,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:math_counting_easy",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N8_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r5:medmcqa_easy",
"r5:conala_curated",
"r5:mbpp_sanitized",
"r5:aqua_rat_numeric"
],
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 4.862,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.07317073170731714,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N12_seed0",
"selected_topk": null,
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 10.478,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.08536585365853654,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N12_seed1",
"selected_topk": null,
"accuracy": 0.71,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 6.801,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.0,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N12_seed2",
"selected_topk": null,
"accuracy": 0.7633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 3.86,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.19512195121951226,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N12_seed0",
"selected_topk": null,
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 2.48,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N12_seed1",
"selected_topk": null,
"accuracy": 0.7433333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 2.299,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1219512195121951,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N12_seed2",
"selected_topk": null,
"accuracy": 0.7566666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 2.517,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.17073170731707346,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_elementary_math",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N12_seed0",
"selected_topk": [
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:svamp",
"r4:aqua_rat",
"r5:medmcqa_easy"
],
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 9.074,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:arc_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N12_seed1",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r5:humaneval",
"r4:math_algebra_easy",
"r5:pubmedqa_pqal",
"r4:aqua_rat",
"r5:medmcqa_easy",
"r5:conala_curated"
],
"accuracy": 0.7033333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 6.701,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": -0.024390243902438775,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 12,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N12_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:humaneval",
"r4:math_algebra_easy",
"r4:openbookqa",
"r4:aqua_rat",
"r5:medmcqa_easy",
"r5:conala_curated"
],
"accuracy": 0.7633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 4.001,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.19512195121951226,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N16_seed0",
"selected_topk": null,
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 8.029,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.07317073170731714,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N16_seed1",
"selected_topk": null,
"accuracy": 0.7433333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 7.467,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1219512195121951,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N16_seed2",
"selected_topk": null,
"accuracy": 0.76,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 3.776,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.18292682926829285,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N16_seed0",
"selected_topk": null,
"accuracy": 0.7533333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 2.287,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.15853658536585366,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N16_seed1",
"selected_topk": null,
"accuracy": 0.7566666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 2.284,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.17073170731707346,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "mean",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N16_seed2",
"selected_topk": null,
"accuracy": 0.7533333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 2.384,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.15853658536585366,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 0,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:arc_easy",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:math_counting_easy",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N16_seed0",
"selected_topk": [
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy"
],
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 7.751,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.07317073170731714,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 1,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:arc_easy",
"r4:openbookqa",
"r4:math_counting_easy",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N16_seed1",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r5:humaneval",
"r4:math_algebra_easy",
"r4:openbookqa",
"r5:pubmedqa_pqal",
"r4:aqua_rat"
],
"accuracy": 0.74,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 6.88,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.10975609756097571,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 16,
"seed": 2,
"deterministic_full_pool": false,
"method": "topk8_global_ridge",
"anchors": [
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mbpp_sanitized",
"r5:conala_curated",
"r5:medmcqa_easy"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N16_seed2",
"selected_topk": [
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:humaneval",
"r4:multiarith",
"r4:math_algebra_easy",
"r4:openbookqa",
"r4:aqua_rat",
"r5:medmcqa_easy"
],
"accuracy": 0.76,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 3.951,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.18292682926829285,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N24_full",
"selected_topk": null,
"accuracy": 0.75,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 10.959,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.14634146341463428,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "mean",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N24_full",
"selected_topk": null,
"accuracy": 0.7566666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 2.284,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.17073170731707346,
"domain": "science"
},
{
"task": "openbookqa_test",
"N": 24,
"seed": 0,
"deterministic_full_pool": true,
"method": "topk8_global_ridge",
"anchors": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N24_full",
"selected_topk": [
"r4:mmlu_high_school_physics",
"r4:mmlu_high_school_biology",
"r4:mbpp_sanitized",
"r4:math_counting_easy",
"r4:mmlu_elementary_math",
"r5:humaneval",
"r4:humaneval",
"r4:multiarith"
],
"accuracy": 0.7133333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 10.881,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.012195121951219795,
"domain": "science"
}
],
"summary": {
"4": {
"mean": {
"n_records": 15,
"gap_recovered_mean": 0.030148586169927653,
"gap_recovered_std": 0.06461483844499136,
"accuracy_mean": 0.26822222222222225,
"accuracy_std": 0.2560873693502683
},
"global_ridge": {
"n_records": 15,
"gap_recovered_mean": -0.003457468457468462,
"gap_recovered_std": 0.19538572711097718,
"accuracy_mean": 0.2604444444444444,
"accuracy_std": 0.24815147277248514
},
"topk8_global_ridge": {
"n_records": 15,
"gap_recovered_mean": -0.016895374837448015,
"gap_recovered_std": 0.18327362210993292,
"accuracy_mean": 0.2591111111111111,
"accuracy_std": 0.24823653168173224
}
},
"8": {
"mean": {
"n_records": 15,
"gap_recovered_mean": 0.06945515922650067,
"gap_recovered_std": 0.06188765948601224,
"accuracy_mean": 0.2728888888888889,
"accuracy_std": 0.25359906697873863
},
"global_ridge": {
"n_records": 15,
"gap_recovered_mean": 0.13060651746627353,
"gap_recovered_std": 0.14004780290486193,
"accuracy_mean": 0.2797777777777778,
"accuracy_std": 0.2511196093519146
},
"topk8_global_ridge": {
"n_records": 15,
"gap_recovered_mean": 0.1253655744661842,
"gap_recovered_std": 0.15427112515492677,
"accuracy_mean": 0.27955555555555556,
"accuracy_std": 0.250399257905826
}
},
"12": {
"mean": {
"n_records": 15,
"gap_recovered_mean": 0.07704113806247954,
"gap_recovered_std": 0.07136104275669189,
"accuracy_mean": 0.27355555555555555,
"accuracy_std": 0.2533159977360188
},
"global_ridge": {
"n_records": 15,
"gap_recovered_mean": 0.12578582720351011,
"gap_recovered_std": 0.12796246741216633,
"accuracy_mean": 0.2813333333333334,
"accuracy_std": 0.24984376070237324
},
"topk8_global_ridge": {
"n_records": 15,
"gap_recovered_mean": 0.11746010031071008,
"gap_recovered_std": 0.10911242321364244,
"accuracy_mean": 0.2793333333333333,
"accuracy_std": 0.24933307868588983
}
},
"16": {
"mean": {
"n_records": 15,
"gap_recovered_mean": 0.07689456207748886,
"gap_recovered_std": 0.07100176997488256,
"accuracy_mean": 0.2748888888888889,
"accuracy_std": 0.25722841022417225
},
"global_ridge": {
"n_records": 15,
"gap_recovered_mean": 0.1365069252111935,
"gap_recovered_std": 0.12641589655600136,
"accuracy_mean": 0.2848888888888889,
"accuracy_std": 0.25340809256150715
},
"topk8_global_ridge": {
"n_records": 15,
"gap_recovered_mean": 0.12353249054468564,
"gap_recovered_std": 0.10122678980066042,
"accuracy_mean": 0.2835555555555556,
"accuracy_std": 0.25276084017909567
}
},
"24": {
"mean": {
"n_records": 5,
"gap_recovered_mean": 0.0830787285208017,
"gap_recovered_std": 0.07181727060927716,
"accuracy_mean": 0.276,
"accuracy_std": 0.27834231522433744
},
"global_ridge": {
"n_records": 5,
"gap_recovered_mean": 0.13478416569879983,
"gap_recovered_std": 0.10350184199429305,
"accuracy_mean": 0.2859999999999999,
"accuracy_std": 0.2754329924561205
},
"topk8_global_ridge": {
"n_records": 5,
"gap_recovered_mean": 0.12109363366985319,
"gap_recovered_std": 0.12401845134797244,
"accuracy_mean": 0.2806666666666667,
"accuracy_std": 0.259950850055245
}
}
}
}