{ "config": { "model_X": "Qwen/Qwen2.5-3B-Instruct", "model_Y": "meta-llama/Llama-3.2-3B-Instruct", "hub_repo": "CK0607/cross-model-lora-prediction-3b", "round6_real_generation_eval": true, "round5_surrogate_deprecated": true, "no_surrogate": true, "eval_examples_requested": 300, "generation": { "do_sample": false, "num_beams": 1, "greedy": true, "max_new_tokens_code": 96, "max_new_tokens_other": 24 }, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "heldouts": [ "gsm_hard", "gsm8k_test_500", "mbpp_test_held", "mbpp_plus", "openbookqa_test" ], "N_values": [ 4, 8, 12, 16, 24 ], "methods": [ "mean", "global_ridge", "topk8_global_ridge" ], "seeds_for_subsampled_N": [ 0, 1, 2 ], "N24_seed": 0, "budget_reduction": null, "wall_seconds": 724.129 }, "adapter_verification": { "listing": { "round4/X": [ "aqua_rat", "arc_challenge", "arc_easy", "gsm8k", "gsm8k_test_500", "gsm_hard", "humaneval", "math_algebra_easy", "math_counting_easy", "mbpp", "mbpp_plus", "mbpp_sanitized", "mbpp_test_held", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics", "multiarith", "openbookqa", "openbookqa_test", "sciq", "svamp" ], "round4/Y": [ "aqua_rat", "arc_challenge", "arc_easy", "gsm8k", "gsm8k_test_500", "gsm_hard", "humaneval", "math_algebra_easy", "math_counting_easy", "mbpp", "mbpp_plus", "mbpp_sanitized", "mbpp_test_held", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics", "multiarith", "openbookqa", "openbookqa_test", "sciq", "svamp" ], "round5/X": [ "aqua_rat_numeric", "conala_curated", "humaneval", "math_counting_easy", "mawps", "mbpp_sanitized", "medmcqa_easy", "pubmedqa_pqal" ], "round5/Y": [ "aqua_rat_numeric", "conala_curated", "humaneval", "math_counting_easy", "mawps", "mbpp_sanitized", "medmcqa_easy", "pubmedqa_pqal" ] }, "missing": [] }, "baselines": { "gsm_hard": { "base_Y": 0.06333333333333334, "oracle": 0.15 }, "gsm8k_test_500": { "base_Y": 0.08, "oracle": 0.29333333333333333 }, "mbpp_test_held": { "base_Y": 0.23, "oracle": 0.32 }, "mbpp_plus": { "base_Y": 0.21666666666666667, "oracle": 0.45 }, "openbookqa_test": { "base_Y": 0.71, "oracle": 0.9833333333333333 } }, "records": [ { "task": "gsm8k_test_500", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N4_seed0", "selected_topk": null, "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 16.047, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N4_seed1", "selected_topk": null, "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 6.767, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.10937500000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N4_seed2", "selected_topk": null, "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 6.193, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.015625000000000038, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N4_seed0", "selected_topk": null, "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 12.83, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N4_seed1", "selected_topk": null, "accuracy": 0.08666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 5.713, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.03125000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N4_seed2", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 7.464, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N4_seed0", "selected_topk": [ "r4:aqua_rat", "r5:conala_curated", "r5:math_counting_easy" ], "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 15.428, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.015625000000000038, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N4_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 7.106, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N4_seed2", "selected_topk": [ "r4:math_counting_easy", "r5:conala_curated", "r5:aqua_rat_numeric" ], "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 6.161, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.03125000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N8_seed0", "selected_topk": null, "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 10.252, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N8_seed1", "selected_topk": null, "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 11.38, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.03125000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N8_seed2", "selected_topk": null, "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 5.259, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N8_seed0", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 8.174, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N8_seed1", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 9.26, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N8_seed2", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 6.14, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N8_seed0", "selected_topk": [ "r5:humaneval", "r4:mmlu_elementary_math", "r4:svamp", "r4:aqua_rat", "r5:conala_curated", "r4:mbpp", "r5:math_counting_easy" ], "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 8.825, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N8_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r5:medmcqa_easy", "r5:math_counting_easy" ], "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 13.534, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.03125000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N8_seed2", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:conala_curated", "r5:medmcqa_easy", "r5:mbpp_sanitized", "r5:aqua_rat_numeric" ], "accuracy": 0.08333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 5.216, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.015624999999999972, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 4.862, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 6.151, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.09, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 4.764, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.04687499999999998, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N12_seed0", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 10.116, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N12_seed1", "selected_topk": null, "accuracy": 0.10666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 9.274, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.12500000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N12_seed2", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 5.933, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N12_seed0", "selected_topk": [ "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r4:aqua_rat", "r5:conala_curated" ], "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 5.642, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N12_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r5:medmcqa_easy" ], "accuracy": 0.09, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 6.188, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.04687499999999998, "domain": "math" }, { "task": "gsm8k_test_500", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N12_seed2", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:humaneval", "r4:math_algebra_easy", "r4:aqua_rat", "r4:openbookqa", "r5:conala_curated", "r5:medmcqa_easy" ], "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 4.729, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 4.316, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 4.721, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 4.675, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N16_seed0", "selected_topk": null, "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 5.288, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N16_seed1", "selected_topk": null, "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 5.185, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N16_seed2", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 5.741, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N16_seed0", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology" ], "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 4.798, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N16_seed1", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa" ], "accuracy": 0.08333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 4.775, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.015624999999999972, "domain": "math" }, { "task": "gsm8k_test_500", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N16_seed2", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:aqua_rat", "r4:openbookqa", "r5:conala_curated" ], "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 4.732, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_global_ridge_N24_full", "selected_topk": null, "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 4.367, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_mean_N24_full", "selected_topk": null, "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 5.883, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375, "domain": "math" }, { "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm8k_test_500_topk8_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math" ], "accuracy": 0.09, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 4.254, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.04687499999999998, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N4_seed0", "selected_topk": null, "accuracy": 0.05, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 18.317, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.15384615384615388, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N4_seed1", "selected_topk": null, "accuracy": 0.03666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 14.877, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.30769230769230776, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N4_seed2", "selected_topk": null, "accuracy": 0.04666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 14.164, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.19230769230769237, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N4_seed0", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 18.709, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N4_seed1", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 15.851, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N4_seed2", "selected_topk": null, "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 15.649, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N4_seed0", "selected_topk": [ "r4:aqua_rat", "r5:conala_curated", "r5:math_counting_easy" ], "accuracy": 0.05333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 18.151, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.11538461538461542, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N4_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.03333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 16.423, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.34615384615384626, "domain": "math" }, { "task": "gsm_hard", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N4_seed2", "selected_topk": [ "r4:math_counting_easy", "r5:conala_curated", "r5:aqua_rat_numeric" ], "accuracy": 0.04, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 14.211, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.26923076923076933, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N8_seed0", "selected_topk": null, "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 17.133, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.1923076923076923, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N8_seed1", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 17.412, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N8_seed2", "selected_topk": null, "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 13.813, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.038461538461538554, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N8_seed0", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 16.831, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N8_seed1", "selected_topk": null, "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 19.168, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.15384615384615374, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N8_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 15.834, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N8_seed0", "selected_topk": [ "r5:humaneval", "r4:mmlu_elementary_math", "r4:svamp", "r4:aqua_rat", "r5:conala_curated", "r4:mbpp", "r5:math_counting_easy" ], "accuracy": 0.08333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 17.033, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.23076923076923067, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N8_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:medmcqa_easy", "r5:conala_curated", "r5:math_counting_easy" ], "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 17.685, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "task": "gsm_hard", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N8_seed2", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:medmcqa_easy", "r5:conala_curated", "r5:mbpp_sanitized", "r5:aqua_rat_numeric" ], "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 12.006, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 14.247, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.038461538461538554, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 16.792, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 10.577, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N12_seed0", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 17.127, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N12_seed1", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 14.327, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N12_seed2", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 13.838, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N12_seed0", "selected_topk": [ "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r4:aqua_rat", "r5:medmcqa_easy" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 16.357, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N12_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:medmcqa_easy", "r5:conala_curated" ], "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 16.623, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.11538461538461534, "domain": "math" }, { "task": "gsm_hard", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N12_seed2", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:humaneval", "r4:math_algebra_easy", "r4:aqua_rat", "r4:openbookqa", "r5:medmcqa_easy", "r5:conala_curated" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 10.698, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 10.982, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 11.658, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 11.127, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N16_seed0", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 13.972, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N16_seed1", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 14.059, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N16_seed2", "selected_topk": null, "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 14.114, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.038461538461538554, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N16_seed0", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology" ], "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 10.721, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N16_seed1", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 11.104, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N16_seed2", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:aqua_rat", "r4:openbookqa", "r5:medmcqa_easy" ], "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 10.618, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_global_ridge_N24_full", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 11.544, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_mean_N24_full", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 13.897, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/gsm_hard_topk8_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math" ], "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 10.731, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "task": "mbpp_plus", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N4_seed0", "selected_topk": null, "accuracy": 0.20666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 91.152, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.04285714285714289, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N4_seed1", "selected_topk": null, "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 88.715, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N4_seed2", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 92.565, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N4_seed0", "selected_topk": null, "accuracy": 0.20666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 92.101, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.04285714285714289, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N4_seed1", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 94.791, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N4_seed2", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 92.398, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N4_seed0", "selected_topk": [ "r4:aqua_rat", "r5:conala_curated", "r5:math_counting_easy" ], "accuracy": 0.20666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 91.879, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.04285714285714289, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N4_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 86.762, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "task": "mbpp_plus", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N4_seed2", "selected_topk": [ "r4:math_counting_easy", "r5:conala_curated", "r4:arc_easy" ], "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 92.694, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N8_seed0", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 87.079, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N8_seed1", "selected_topk": null, "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 87.223, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N8_seed2", "selected_topk": null, "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 90.746, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N8_seed0", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 89.515, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N8_seed1", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 92.797, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N8_seed2", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 94.052, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N8_seed0", "selected_topk": [ "r5:humaneval", "r4:mmlu_elementary_math", "r4:svamp", "r4:aqua_rat", "r5:conala_curated", "r5:math_counting_easy", "r4:mbpp" ], "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 89.202, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N8_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r5:medmcqa_easy", "r5:math_counting_easy" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 87.665, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "task": "mbpp_plus", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N8_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:math_algebra_easy", "r5:conala_curated", "r5:medmcqa_easy", "r5:mbpp_sanitized", "r4:arc_easy" ], "accuracy": 0.2733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 72.699, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.24285714285714274, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 92.332, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.27666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 71.408, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.2571428571428571, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.27666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 71.427, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.2571428571428571, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N12_seed0", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 91.029, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N12_seed1", "selected_topk": null, "accuracy": 0.21, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 75.373, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.028571428571428636, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N12_seed2", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 75.328, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N12_seed0", "selected_topk": [ "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:svamp", "r4:aqua_rat", "r5:conala_curated" ], "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 74.153, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N12_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:mmlu_high_school_physics", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r5:medmcqa_easy" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 72.042, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "task": "mbpp_plus", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N12_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:humaneval", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r5:medmcqa_easy" ], "accuracy": 0.2633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 71.075, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.19999999999999984, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 71.109, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.2733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 70.902, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.24285714285714274, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 73.046, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N16_seed0", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 73.969, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N16_seed1", "selected_topk": null, "accuracy": 0.21, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 78.014, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.028571428571428636, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N16_seed2", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 75.572, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N16_seed0", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy" ], "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 70.097, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N16_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated" ], "accuracy": 0.2733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 70.33, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.24285714285714274, "domain": "code" }, { "task": "mbpp_plus", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N16_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:humaneval", "r4:math_counting_easy", "r4:multiarith", "r4:math_algebra_easy", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa" ], "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 69.202, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_global_ridge_N24_full", "selected_topk": null, "accuracy": 0.2733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 69.544, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.24285714285714274, "domain": "code" }, { "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_mean_N24_full", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 77.605, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_plus_topk8_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math" ], "accuracy": 0.2833333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 71.1, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.28571428571428564, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N4_seed0", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 30.304, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N4_seed1", "selected_topk": null, "accuracy": 0.28, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 30.334, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.5555555555555558, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N4_seed2", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 29.778, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N4_seed0", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 30.954, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N4_seed1", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 29.348, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N4_seed2", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 30.259, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N4_seed0", "selected_topk": [ "r4:aqua_rat", "r5:conala_curated", "r4:arc_easy" ], "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 29.348, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N4_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 29.833, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.44444444444444453, "domain": "code" }, { "task": "mbpp_test_held", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N4_seed2", "selected_topk": [ "r4:math_counting_easy", "r5:conala_curated", "r4:arc_easy" ], "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 29.391, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N8_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 28.852, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N8_seed1", "selected_topk": null, "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 30.289, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.44444444444444453, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N8_seed2", "selected_topk": null, "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 28.2, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N8_seed0", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 29.436, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N8_seed1", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 28.812, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N8_seed2", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 30.096, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N8_seed0", "selected_topk": [ "r5:humaneval", "r4:mmlu_elementary_math", "r4:svamp", "r4:aqua_rat", "r5:conala_curated", "r4:mbpp", "r4:arc_easy" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 28.379, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N8_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r5:medmcqa_easy", "r5:math_counting_easy" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 31.024, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.44444444444444453, "domain": "code" }, { "task": "mbpp_test_held", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N8_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:math_algebra_easy", "r5:conala_curated", "r5:medmcqa_easy", "r5:mbpp_sanitized", "r4:arc_easy" ], "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 29.802, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 29.335, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 24.376, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 25.224, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N12_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 29.425, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N12_seed1", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 25.255, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N12_seed2", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 24.291, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N12_seed0", "selected_topk": [ "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:svamp", "r4:aqua_rat", "r5:conala_curated" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 28.665, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N12_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:mmlu_high_school_physics", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r5:medmcqa_easy" ], "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 24.393, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N12_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:humaneval", "r4:math_algebra_easy", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r5:medmcqa_easy" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 24.33, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 24.197, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 24.539, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 24.852, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N16_seed0", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 23.992, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N16_seed1", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 24.993, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N16_seed2", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 24.299, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N16_seed0", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 25.384, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N16_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:mmlu_high_school_physics", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 25.052, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N16_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 24.877, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_global_ridge_N24_full", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 23.396, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_mean_N24_full", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 23.907, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/mbpp_test_held_topk8_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 24.651, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "task": "openbookqa_test", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N4_seed0", "selected_topk": null, "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 10.099, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.07317073170731714, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N4_seed1", "selected_topk": null, "accuracy": 0.69, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 4.078, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": -0.07317073170731714, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N4_seed2", "selected_topk": null, "accuracy": 0.71, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 2.76, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.0, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N4_seed0", "selected_topk": null, "accuracy": 0.75, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 2.965, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.14634146341463428, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N4_seed1", "selected_topk": null, "accuracy": 0.74, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 4.032, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.10975609756097571, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N4_seed2", "selected_topk": null, "accuracy": 0.7466666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 3.041, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1341463414634149, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N4_seed0", "selected_topk": [ "r4:aqua_rat", "r5:conala_curated", "r5:math_counting_easy" ], "accuracy": 0.72, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 9.433, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.03658536585365857, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N4_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.69, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 4.006, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": -0.07317073170731714, "domain": "science" }, { "task": "openbookqa_test", "N": 4, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r5:aqua_rat_numeric", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N4_seed2", "selected_topk": [ "r4:math_counting_easy", "r5:conala_curated", "r5:aqua_rat_numeric" ], "accuracy": 0.7166666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 3.818, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.02439024390243918, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N8_seed0", "selected_topk": null, "accuracy": 0.7433333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 6.422, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1219512195121951, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N8_seed1", "selected_topk": null, "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 5.598, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.08536585365853654, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N8_seed2", "selected_topk": null, "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 3.741, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.08536585365853654, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N8_seed0", "selected_topk": null, "accuracy": 0.7533333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 4.328, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.15853658536585366, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N8_seed1", "selected_topk": null, "accuracy": 0.7433333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 3.192, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1219512195121951, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N8_seed2", "selected_topk": null, "accuracy": 0.74, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 2.692, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.10975609756097571, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N8_seed0", "selected_topk": [ "r4:mmlu_elementary_math", "r5:humaneval", "r4:svamp", "r4:aqua_rat", "r5:conala_curated", "r4:mbpp", "r5:math_counting_easy" ], "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 6.351, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp_sanitized", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N8_seed1", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:medmcqa_easy", "r5:conala_curated", "r5:math_counting_easy" ], "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 4.449, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "task": "openbookqa_test", "N": 8, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N8_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:math_algebra_easy", "r5:medmcqa_easy", "r5:conala_curated", "r5:mbpp_sanitized", "r5:aqua_rat_numeric" ], "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 4.862, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.07317073170731714, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 10.478, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.08536585365853654, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.71, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 6.801, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.0, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.7633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 3.86, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.19512195121951226, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N12_seed0", "selected_topk": null, "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 2.48, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N12_seed1", "selected_topk": null, "accuracy": 0.7433333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 2.299, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1219512195121951, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N12_seed2", "selected_topk": null, "accuracy": 0.7566666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 2.517, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.17073170731707346, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N12_seed0", "selected_topk": [ "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:svamp", "r4:aqua_rat", "r5:medmcqa_easy" ], "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 9.074, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N12_seed1", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r5:humaneval", "r4:math_algebra_easy", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:medmcqa_easy", "r5:conala_curated" ], "accuracy": 0.7033333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 6.701, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": -0.024390243902438775, "domain": "science" }, { "task": "openbookqa_test", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N12_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:humaneval", "r4:math_algebra_easy", "r4:openbookqa", "r4:aqua_rat", "r5:medmcqa_easy", "r5:conala_curated" ], "accuracy": 0.7633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 4.001, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.19512195121951226, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 8.029, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.07317073170731714, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.7433333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 7.467, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1219512195121951, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.76, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 3.776, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.18292682926829285, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N16_seed0", "selected_topk": null, "accuracy": 0.7533333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 2.287, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.15853658536585366, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N16_seed1", "selected_topk": null, "accuracy": 0.7566666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 2.284, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.17073170731707346, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "mean", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N16_seed2", "selected_topk": null, "accuracy": 0.7533333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 2.384, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.15853658536585366, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N16_seed0", "selected_topk": [ "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy" ], "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 7.751, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.07317073170731714, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N16_seed1", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:math_algebra_easy", "r4:openbookqa", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.74, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 6.88, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.10975609756097571, "domain": "science" }, { "task": "openbookqa_test", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "topk8_global_ridge", "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N16_seed2", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:openbookqa", "r4:aqua_rat", "r5:medmcqa_easy" ], "accuracy": 0.76, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 3.951, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.18292682926829285, "domain": "science" }, { "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_global_ridge_N24_full", "selected_topk": null, "accuracy": 0.75, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 10.959, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.14634146341463428, "domain": "science" }, { "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "mean", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_mean_N24_full", "selected_topk": null, "accuracy": 0.7566666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 2.284, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.17073170731707346, "domain": "science" }, { "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round6/Y_pred/openbookqa_test_topk8_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith" ], "accuracy": 0.7133333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 10.881, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.012195121951219795, "domain": "science" } ], "summary": { "4": { "mean": { "n_records": 15, "gap_recovered_mean": 0.030148586169927653, "gap_recovered_std": 0.06461483844499136, "accuracy_mean": 0.26822222222222225, "accuracy_std": 0.2560873693502683 }, "global_ridge": { "n_records": 15, "gap_recovered_mean": -0.003457468457468462, "gap_recovered_std": 0.19538572711097718, "accuracy_mean": 0.2604444444444444, "accuracy_std": 0.24815147277248514 }, "topk8_global_ridge": { "n_records": 15, "gap_recovered_mean": -0.016895374837448015, "gap_recovered_std": 0.18327362210993292, "accuracy_mean": 0.2591111111111111, "accuracy_std": 0.24823653168173224 } }, "8": { "mean": { "n_records": 15, "gap_recovered_mean": 0.06945515922650067, "gap_recovered_std": 0.06188765948601224, "accuracy_mean": 0.2728888888888889, "accuracy_std": 0.25359906697873863 }, "global_ridge": { "n_records": 15, "gap_recovered_mean": 0.13060651746627353, "gap_recovered_std": 0.14004780290486193, "accuracy_mean": 0.2797777777777778, "accuracy_std": 0.2511196093519146 }, "topk8_global_ridge": { "n_records": 15, "gap_recovered_mean": 0.1253655744661842, "gap_recovered_std": 0.15427112515492677, "accuracy_mean": 0.27955555555555556, "accuracy_std": 0.250399257905826 } }, "12": { "mean": { "n_records": 15, "gap_recovered_mean": 0.07704113806247954, "gap_recovered_std": 0.07136104275669189, "accuracy_mean": 0.27355555555555555, "accuracy_std": 0.2533159977360188 }, "global_ridge": { "n_records": 15, "gap_recovered_mean": 0.12578582720351011, "gap_recovered_std": 0.12796246741216633, "accuracy_mean": 0.2813333333333334, "accuracy_std": 0.24984376070237324 }, "topk8_global_ridge": { "n_records": 15, "gap_recovered_mean": 0.11746010031071008, "gap_recovered_std": 0.10911242321364244, "accuracy_mean": 0.2793333333333333, "accuracy_std": 0.24933307868588983 } }, "16": { "mean": { "n_records": 15, "gap_recovered_mean": 0.07689456207748886, "gap_recovered_std": 0.07100176997488256, "accuracy_mean": 0.2748888888888889, "accuracy_std": 0.25722841022417225 }, "global_ridge": { "n_records": 15, "gap_recovered_mean": 0.1365069252111935, "gap_recovered_std": 0.12641589655600136, "accuracy_mean": 0.2848888888888889, "accuracy_std": 0.25340809256150715 }, "topk8_global_ridge": { "n_records": 15, "gap_recovered_mean": 0.12353249054468564, "gap_recovered_std": 0.10122678980066042, "accuracy_mean": 0.2835555555555556, "accuracy_std": 0.25276084017909567 } }, "24": { "mean": { "n_records": 5, "gap_recovered_mean": 0.0830787285208017, "gap_recovered_std": 0.07181727060927716, "accuracy_mean": 0.276, "accuracy_std": 0.27834231522433744 }, "global_ridge": { "n_records": 5, "gap_recovered_mean": 0.13478416569879983, "gap_recovered_std": 0.10350184199429305, "accuracy_mean": 0.2859999999999999, "accuracy_std": 0.2754329924561205 }, "topk8_global_ridge": { "n_records": 5, "gap_recovered_mean": 0.12109363366985319, "gap_recovered_std": 0.12401845134797244, "accuracy_mean": 0.2806666666666667, "accuracy_std": 0.259950850055245 } } } }