{ "config": { "model_X": "Qwen/Qwen2.5-3B-Instruct", "model_Y": "meta-llama/Llama-3.2-3B-Instruct", "hub_repo": "CK0607/cross-model-lora-prediction-3b", "round8_real_generation_eval": true, "no_surrogate": true, "no_retraining": true, "eval_examples_requested": 300, "generation": { "do_sample": false, "num_beams": 1, "greedy": true, "max_new_tokens_code": 96, "max_new_tokens_other": 24 }, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "heldouts": [ "gsm_hard", "gsm8k_test_500", "mbpp_test_held", "mbpp_plus", "openbookqa_test" ], "subexperiments": { "1A_k_sweep": { "N": 24, "seed": 0, "K_values": [ 2, 4, 6, 8, 12, 16, 20, 24 ], "cells": 40 }, "1B_pertensor_methods": { "N_values": [ 12, 16, 24 ], "methods": [ "pertensor_ridge", "procrustes", "pertensor_pca" ], "seeds_for_subsampled_N": [ 0, 1, 2 ], "N24_seed": 0, "cells": 105 }, "1C_per_task_breakdown": { "derived_from": "results_round6.json records plus R8 N=16 records", "new_compute": false } }, "budget_reduction": null, "wall_seconds": 3221.695 }, "adapter_verification": { "listing": { "round4/X": [ "aqua_rat", "arc_challenge", "arc_easy", "gsm8k", "gsm8k_test_500", "gsm_hard", "humaneval", "math_algebra_easy", "math_counting_easy", "mbpp", "mbpp_plus", "mbpp_sanitized", "mbpp_test_held", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics", "multiarith", "openbookqa", "openbookqa_test", "sciq", "svamp" ], "round4/Y": [ "aqua_rat", "arc_challenge", "arc_easy", "gsm8k", "gsm8k_test_500", "gsm_hard", "humaneval", "math_algebra_easy", "math_counting_easy", "mbpp", "mbpp_plus", "mbpp_sanitized", "mbpp_test_held", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics", "multiarith", "openbookqa", "openbookqa_test", "sciq", "svamp" ], "round5/X": [ "aqua_rat_numeric", "conala_curated", "humaneval", "math_counting_easy", "mawps", "mbpp_sanitized", "medmcqa_easy", "pubmedqa_pqal" ], "round5/Y": [ "aqua_rat_numeric", "conala_curated", "humaneval", "math_counting_easy", "mawps", "mbpp_sanitized", "medmcqa_easy", "pubmedqa_pqal" ], "round6/Y_pred": [ "gsm8k_test_500_global_ridge_N12_seed0", "gsm8k_test_500_global_ridge_N12_seed1", "gsm8k_test_500_global_ridge_N12_seed2", "gsm8k_test_500_global_ridge_N16_seed0", "gsm8k_test_500_global_ridge_N16_seed1", "gsm8k_test_500_global_ridge_N16_seed2", "gsm8k_test_500_global_ridge_N24_full", "gsm8k_test_500_global_ridge_N4_seed0", "gsm8k_test_500_global_ridge_N4_seed1", "gsm8k_test_500_global_ridge_N4_seed2", "gsm8k_test_500_global_ridge_N8_seed0", "gsm8k_test_500_global_ridge_N8_seed1", "gsm8k_test_500_global_ridge_N8_seed2", "gsm8k_test_500_mean_N12_seed0", "gsm8k_test_500_mean_N12_seed1", "gsm8k_test_500_mean_N12_seed2", "gsm8k_test_500_mean_N16_seed0", "gsm8k_test_500_mean_N16_seed1", "gsm8k_test_500_mean_N16_seed2", "gsm8k_test_500_mean_N24_full", "gsm8k_test_500_mean_N4_seed0", "gsm8k_test_500_mean_N4_seed1", "gsm8k_test_500_mean_N4_seed2", "gsm8k_test_500_mean_N8_seed0", "gsm8k_test_500_mean_N8_seed1", "gsm8k_test_500_mean_N8_seed2", "gsm8k_test_500_topk8_global_ridge_N12_seed0", "gsm8k_test_500_topk8_global_ridge_N12_seed1", "gsm8k_test_500_topk8_global_ridge_N12_seed2", "gsm8k_test_500_topk8_global_ridge_N16_seed0", "gsm8k_test_500_topk8_global_ridge_N16_seed1", "gsm8k_test_500_topk8_global_ridge_N16_seed2", "gsm8k_test_500_topk8_global_ridge_N24_full", "gsm8k_test_500_topk8_global_ridge_N4_seed0", "gsm8k_test_500_topk8_global_ridge_N4_seed1", "gsm8k_test_500_topk8_global_ridge_N4_seed2", "gsm8k_test_500_topk8_global_ridge_N8_seed0", "gsm8k_test_500_topk8_global_ridge_N8_seed1", "gsm8k_test_500_topk8_global_ridge_N8_seed2", "gsm_hard_global_ridge_N12_seed0", "gsm_hard_global_ridge_N12_seed1", "gsm_hard_global_ridge_N12_seed2", "gsm_hard_global_ridge_N16_seed0", "gsm_hard_global_ridge_N16_seed1", "gsm_hard_global_ridge_N16_seed2", "gsm_hard_global_ridge_N24_full", "gsm_hard_global_ridge_N4_seed0", "gsm_hard_global_ridge_N4_seed1", "gsm_hard_global_ridge_N4_seed2", "gsm_hard_global_ridge_N8_seed0", "gsm_hard_global_ridge_N8_seed1", "gsm_hard_global_ridge_N8_seed2", "gsm_hard_mean_N12_seed0", "gsm_hard_mean_N12_seed1", "gsm_hard_mean_N12_seed2", "gsm_hard_mean_N16_seed0", "gsm_hard_mean_N16_seed1", "gsm_hard_mean_N16_seed2", "gsm_hard_mean_N24_full", "gsm_hard_mean_N4_seed0", "gsm_hard_mean_N4_seed1", "gsm_hard_mean_N4_seed2", "gsm_hard_mean_N8_seed0", "gsm_hard_mean_N8_seed1", "gsm_hard_mean_N8_seed2", "gsm_hard_topk8_global_ridge_N12_seed0", "gsm_hard_topk8_global_ridge_N12_seed1", "gsm_hard_topk8_global_ridge_N12_seed2", "gsm_hard_topk8_global_ridge_N16_seed0", "gsm_hard_topk8_global_ridge_N16_seed1", "gsm_hard_topk8_global_ridge_N16_seed2", "gsm_hard_topk8_global_ridge_N24_full", "gsm_hard_topk8_global_ridge_N4_seed0", "gsm_hard_topk8_global_ridge_N4_seed1", "gsm_hard_topk8_global_ridge_N4_seed2", "gsm_hard_topk8_global_ridge_N8_seed0", "gsm_hard_topk8_global_ridge_N8_seed1", "gsm_hard_topk8_global_ridge_N8_seed2", "mbpp_plus_global_ridge_N12_seed0", "mbpp_plus_global_ridge_N12_seed1", "mbpp_plus_global_ridge_N12_seed2", "mbpp_plus_global_ridge_N16_seed0", "mbpp_plus_global_ridge_N16_seed1", "mbpp_plus_global_ridge_N16_seed2", "mbpp_plus_global_ridge_N24_full", "mbpp_plus_global_ridge_N4_seed0", "mbpp_plus_global_ridge_N4_seed1", "mbpp_plus_global_ridge_N4_seed2", "mbpp_plus_global_ridge_N8_seed0", "mbpp_plus_global_ridge_N8_seed1", "mbpp_plus_global_ridge_N8_seed2", "mbpp_plus_mean_N12_seed0", "mbpp_plus_mean_N12_seed1", "mbpp_plus_mean_N12_seed2", "mbpp_plus_mean_N16_seed0", "mbpp_plus_mean_N16_seed1", "mbpp_plus_mean_N16_seed2", "mbpp_plus_mean_N24_full", "mbpp_plus_mean_N4_seed0", "mbpp_plus_mean_N4_seed1", "mbpp_plus_mean_N4_seed2", "mbpp_plus_mean_N8_seed0", "mbpp_plus_mean_N8_seed1", "mbpp_plus_mean_N8_seed2", "mbpp_plus_topk8_global_ridge_N12_seed0", "mbpp_plus_topk8_global_ridge_N12_seed1", "mbpp_plus_topk8_global_ridge_N12_seed2", "mbpp_plus_topk8_global_ridge_N16_seed0", "mbpp_plus_topk8_global_ridge_N16_seed1", "mbpp_plus_topk8_global_ridge_N16_seed2", "mbpp_plus_topk8_global_ridge_N24_full", "mbpp_plus_topk8_global_ridge_N4_seed0", "mbpp_plus_topk8_global_ridge_N4_seed1", "mbpp_plus_topk8_global_ridge_N4_seed2", "mbpp_plus_topk8_global_ridge_N8_seed0", "mbpp_plus_topk8_global_ridge_N8_seed1", "mbpp_plus_topk8_global_ridge_N8_seed2", "mbpp_test_held_global_ridge_N12_seed0", "mbpp_test_held_global_ridge_N12_seed1", "mbpp_test_held_global_ridge_N12_seed2", "mbpp_test_held_global_ridge_N16_seed0", "mbpp_test_held_global_ridge_N16_seed1", "mbpp_test_held_global_ridge_N16_seed2", "mbpp_test_held_global_ridge_N24_full", "mbpp_test_held_global_ridge_N4_seed0", "mbpp_test_held_global_ridge_N4_seed1", "mbpp_test_held_global_ridge_N4_seed2", "mbpp_test_held_global_ridge_N8_seed0", "mbpp_test_held_global_ridge_N8_seed1", "mbpp_test_held_global_ridge_N8_seed2", "mbpp_test_held_mean_N12_seed0", "mbpp_test_held_mean_N12_seed1", "mbpp_test_held_mean_N12_seed2", "mbpp_test_held_mean_N16_seed0", "mbpp_test_held_mean_N16_seed1", "mbpp_test_held_mean_N16_seed2", "mbpp_test_held_mean_N24_full", "mbpp_test_held_mean_N4_seed0", "mbpp_test_held_mean_N4_seed1", "mbpp_test_held_mean_N4_seed2", "mbpp_test_held_mean_N8_seed0", "mbpp_test_held_mean_N8_seed1", "mbpp_test_held_mean_N8_seed2", "mbpp_test_held_topk8_global_ridge_N12_seed0", "mbpp_test_held_topk8_global_ridge_N12_seed1", "mbpp_test_held_topk8_global_ridge_N12_seed2", "mbpp_test_held_topk8_global_ridge_N16_seed0", "mbpp_test_held_topk8_global_ridge_N16_seed1", "mbpp_test_held_topk8_global_ridge_N16_seed2", "mbpp_test_held_topk8_global_ridge_N24_full", "mbpp_test_held_topk8_global_ridge_N4_seed0", "mbpp_test_held_topk8_global_ridge_N4_seed1", "mbpp_test_held_topk8_global_ridge_N4_seed2", "mbpp_test_held_topk8_global_ridge_N8_seed0", "mbpp_test_held_topk8_global_ridge_N8_seed1", "mbpp_test_held_topk8_global_ridge_N8_seed2", "openbookqa_test_global_ridge_N12_seed0", "openbookqa_test_global_ridge_N12_seed1", "openbookqa_test_global_ridge_N12_seed2", "openbookqa_test_global_ridge_N16_seed0", "openbookqa_test_global_ridge_N16_seed1", "openbookqa_test_global_ridge_N16_seed2", "openbookqa_test_global_ridge_N24_full", "openbookqa_test_global_ridge_N4_seed0", "openbookqa_test_global_ridge_N4_seed1", "openbookqa_test_global_ridge_N4_seed2", "openbookqa_test_global_ridge_N8_seed0", "openbookqa_test_global_ridge_N8_seed1", "openbookqa_test_global_ridge_N8_seed2", "openbookqa_test_mean_N12_seed0", "openbookqa_test_mean_N12_seed1", "openbookqa_test_mean_N12_seed2", "openbookqa_test_mean_N16_seed0", "openbookqa_test_mean_N16_seed1", "openbookqa_test_mean_N16_seed2", "openbookqa_test_mean_N24_full", "openbookqa_test_mean_N4_seed0", "openbookqa_test_mean_N4_seed1", "openbookqa_test_mean_N4_seed2", "openbookqa_test_mean_N8_seed0", "openbookqa_test_mean_N8_seed1", "openbookqa_test_mean_N8_seed2", "openbookqa_test_topk8_global_ridge_N12_seed0", "openbookqa_test_topk8_global_ridge_N12_seed1", "openbookqa_test_topk8_global_ridge_N12_seed2", "openbookqa_test_topk8_global_ridge_N16_seed0", "openbookqa_test_topk8_global_ridge_N16_seed1", "openbookqa_test_topk8_global_ridge_N16_seed2", "openbookqa_test_topk8_global_ridge_N24_full", "openbookqa_test_topk8_global_ridge_N4_seed0", "openbookqa_test_topk8_global_ridge_N4_seed1", "openbookqa_test_topk8_global_ridge_N4_seed2", "openbookqa_test_topk8_global_ridge_N8_seed0", "openbookqa_test_topk8_global_ridge_N8_seed1", "openbookqa_test_topk8_global_ridge_N8_seed2" ] }, "missing": [], "count_warnings": [] }, "baselines": { "gsm_hard": { "base_Y": 0.06333333333333334, "oracle": 0.15 }, "gsm8k_test_500": { "base_Y": 0.08, "oracle": 0.29333333333333333 }, "mbpp_test_held": { "base_Y": 0.23, "oracle": 0.32 }, "mbpp_plus": { "base_Y": 0.21666666666666667, "oracle": 0.45 }, "openbookqa_test": { "base_Y": 0.71, "oracle": 0.9833333333333333 } }, "r6_reference": { "N16_global_ridge_gap_recovered_mean": 0.1365069252111935, "N24_global_ridge_gap_recovered_mean": 0.13478416569879983, "N24_topk8_global_ridge_gap_recovered_mean": 0.12109363366985319 }, "records": [ { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk12_global_ridge", "topk_K": 12, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk12_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 8.139, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk16_global_ridge", "topk_K": 16, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk16_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa", "r5:conala_curated", "r4:medmcqa_easy", "r5:medmcqa_easy" ], "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 8.196, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk20_global_ridge", "topk_K": 20, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk20_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa", "r5:conala_curated", "r4:medmcqa_easy", "r5:medmcqa_easy", "r4:mbpp", "r5:mbpp_sanitized", "r4:sciq", "r5:math_counting_easy" ], "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 8.624, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk24_global_ridge", "topk_K": 24, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk24_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa", "r5:conala_curated", "r4:medmcqa_easy", "r5:medmcqa_easy", "r5:mbpp_sanitized", "r4:mbpp", "r4:sciq", "r5:math_counting_easy", "r5:aqua_rat_numeric", "r4:arc_easy", "r4:gsm8k", "r5:mawps" ], "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 8.117, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk2_global_ridge", "topk_K": 2, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk2_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 7.03, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.07812499999999999, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk4_global_ridge", "topk_K": 4, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk4_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r4:humaneval" ], "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 7.458, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk6_global_ridge", "topk_K": 6, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk6_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith" ], "accuracy": 0.08666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 7.563, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.03125000000000001, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "topk_K": 8, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_topk8_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math" ], "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 7.504, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk12_global_ridge", "topk_K": 12, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk12_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 22.248, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.038461538461538554, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk16_global_ridge", "topk_K": 16, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk16_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa", "r5:medmcqa_easy", "r4:medmcqa_easy", "r5:conala_curated" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 23.394, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk20_global_ridge", "topk_K": 20, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk20_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa", "r5:medmcqa_easy", "r4:medmcqa_easy", "r5:conala_curated", "r4:mbpp", "r5:mbpp_sanitized", "r5:math_counting_easy", "r4:sciq" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 23.314, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk24_global_ridge", "topk_K": 24, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk24_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math", "r4:mmlu_high_school_biology", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:openbookqa", "r4:medmcqa_easy", "r5:medmcqa_easy", "r5:conala_curated", "r5:mbpp_sanitized", "r4:mbpp", "r5:math_counting_easy", "r4:sciq", "r4:gsm8k", "r5:aqua_rat_numeric", "r4:arc_easy", "r5:mawps" ], "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 23.33, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk2_global_ridge", "topk_K": 2, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk2_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized" ], "accuracy": 0.03666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 19.311, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.30769230769230776, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk4_global_ridge", "topk_K": 4, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk4_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r4:humaneval" ], "accuracy": 0.04666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 24.59, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.19230769230769237, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk6_global_ridge", "topk_K": 6, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk6_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith" ], "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 22.195, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "topk_K": 8, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_topk8_global_ridge_N24_full", "selected_topk": [ "r4:math_counting_easy", "r4:mbpp_sanitized", "r4:mmlu_high_school_physics", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:mmlu_elementary_math" ], "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 21.359, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.038461538461538554, "domain": "math" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk12_global_ridge", "topk_K": 12, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk12_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.27666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 149.129, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.2571428571428571, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk16_global_ridge", "topk_K": 16, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk16_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r4:medmcqa_easy", "r5:medmcqa_easy" ], "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 151.069, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk20_global_ridge", "topk_K": 20, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk20_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r4:medmcqa_easy", "r5:medmcqa_easy", "r5:math_counting_easy", "r4:mbpp", "r5:mbpp_sanitized", "r4:arc_easy" ], "accuracy": 0.2633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 149.578, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.19999999999999984, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk24_global_ridge", "topk_K": 24, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk24_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r5:medmcqa_easy", "r4:medmcqa_easy", "r5:math_counting_easy", "r4:mbpp", "r5:mbpp_sanitized", "r4:arc_easy", "r4:sciq", "r4:gsm8k", "r5:mawps", "r5:aqua_rat_numeric" ], "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 149.26, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.18571428571428572, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk2_global_ridge", "topk_K": 2, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk2_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:humaneval" ], "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 151.982, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk4_global_ridge", "topk_K": 4, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk4_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:humaneval", "r5:humaneval", "r4:math_counting_easy" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 148.839, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk6_global_ridge", "topk_K": 6, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk6_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith" ], "accuracy": 0.27, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 152.179, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.22857142857142862, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "topk_K": 8, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_topk8_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r5:humaneval", "r4:humaneval", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math" ], "accuracy": 0.2733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 152.508, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.24285714285714274, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk12_global_ridge", "topk_K": 12, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk12_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 50.687, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk16_global_ridge", "topk_K": 16, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk16_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r4:medmcqa_easy", "r5:medmcqa_easy" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 50.62, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk20_global_ridge", "topk_K": 20, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk20_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r5:medmcqa_easy", "r4:medmcqa_easy", "r5:mbpp_sanitized", "r4:mbpp", "r4:arc_easy", "r5:math_counting_easy" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 51.775, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk24_global_ridge", "topk_K": 24, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk24_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:svamp", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:conala_curated", "r4:openbookqa", "r5:medmcqa_easy", "r4:medmcqa_easy", "r4:mbpp", "r5:mbpp_sanitized", "r4:arc_easy", "r5:math_counting_easy", "r4:sciq", "r5:mawps", "r4:gsm8k", "r5:aqua_rat_numeric" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 50.243, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk2_global_ridge", "topk_K": 2, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk2_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy" ], "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 51.753, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk4_global_ridge", "topk_K": 4, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk4_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:humaneval", "r5:humaneval" ], "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 51.777, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk6_global_ridge", "topk_K": 6, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk6_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith" ], "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 51.66, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "topk_K": 8, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_topk8_global_ridge_N24_full", "selected_topk": [ "r4:mbpp_sanitized", "r4:math_counting_easy", "r5:humaneval", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:mmlu_elementary_math" ], "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 51.078, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk12_global_ridge", "topk_K": 12, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk12_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:svamp", "r4:openbookqa", "r5:pubmedqa_pqal" ], "accuracy": 0.7466666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 21.136, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1341463414634149, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk16_global_ridge", "topk_K": 16, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk16_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:svamp", "r4:openbookqa", "r5:pubmedqa_pqal", "r4:aqua_rat", "r5:medmcqa_easy", "r4:medmcqa_easy", "r5:conala_curated" ], "accuracy": 0.75, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 23.81, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.14634146341463428, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk20_global_ridge", "topk_K": 20, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk20_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:svamp", "r4:openbookqa", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:medmcqa_easy", "r5:medmcqa_easy", "r5:conala_curated", "r4:sciq", "r5:mbpp_sanitized", "r4:mbpp", "r5:math_counting_easy" ], "accuracy": 0.7466666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 23.886, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1341463414634149, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk24_global_ridge", "topk_K": 24, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk24_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith", "r4:math_algebra_easy", "r4:svamp", "r4:openbookqa", "r5:pubmedqa_pqal", "r4:aqua_rat", "r4:medmcqa_easy", "r5:medmcqa_easy", "r5:conala_curated", "r4:sciq", "r5:mbpp_sanitized", "r4:mbpp", "r5:math_counting_easy", "r4:gsm8k", "r5:mawps", "r5:aqua_rat_numeric", "r4:arc_easy" ], "accuracy": 0.75, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 21.378, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.14634146341463428, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk2_global_ridge", "topk_K": 2, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk2_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology" ], "accuracy": 0.71, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 28.785, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.0, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk4_global_ridge", "topk_K": 4, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk4_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy" ], "accuracy": 0.7033333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 17.313, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": -0.024390243902438775, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk6_global_ridge", "topk_K": 6, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk6_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval" ], "accuracy": 0.7133333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 23.886, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.012195121951219795, "domain": "science" }, { "subexperiment": "1A_k_sweep", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "topk8_global_ridge", "topk_K": 8, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_topk8_global_ridge_N24_full", "selected_topk": [ "r4:mmlu_high_school_physics", "r4:mmlu_high_school_biology", "r4:mbpp_sanitized", "r4:math_counting_easy", "r4:mmlu_elementary_math", "r5:humaneval", "r4:humaneval", "r4:multiarith" ], "accuracy": 0.7133333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 22.785, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.012195121951219795, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed0", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 12.148, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed1", "selected_topk": null, "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 12.822, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N12_seed2", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 8.836, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.08666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 8.773, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.03125000000000001, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 11.491, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.09, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 8.275, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.04687499999999998, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed0", "selected_topk": null, "accuracy": 0.09, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 9.787, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.04687499999999998, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed1", "selected_topk": null, "accuracy": 0.10666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 11.914, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.12500000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N12_seed2", "selected_topk": null, "accuracy": 0.08666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 9.246, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.03125000000000001, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed0", "selected_topk": null, "accuracy": 0.10666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 8.994, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.12500000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed1", "selected_topk": null, "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 9.926, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N16_seed2", "selected_topk": null, "accuracy": 0.10666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 7.993, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.12500000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.08333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 8.441, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.015624999999999972, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.08333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 8.255, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.015624999999999972, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 7.784, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed0", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 9.276, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed1", "selected_topk": null, "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 9.883, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N16_seed2", "selected_topk": null, "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 8.051, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_pca_N24_full", "selected_topk": null, "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 10.998, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.09375000000000003, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_pertensor_ridge_N24_full", "selected_topk": null, "accuracy": 0.08666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 8.517, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.03125000000000001, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm8k_test_500", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm8k_test_500_procrustes_N24_full", "selected_topk": null, "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 8.354, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.06250000000000001, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed0", "selected_topk": null, "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 28.943, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.15384615384615374, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed1", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.182, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N12_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 25.292, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 26.471, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 27.19, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 22.78, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed0", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.412, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed1", "selected_topk": null, "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 26.601, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.11538461538461534, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N12_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 26.109, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed0", "selected_topk": null, "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 24.319, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.15384615384615374, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed1", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 27.608, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N16_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 24.916, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 24.16, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 24.881, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 24.068, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed0", "selected_topk": null, "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 26.767, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.038461538461538394, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed1", "selected_topk": null, "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 23.901, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.07692307692307702, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N16_seed2", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 26.267, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_pca_N24_full", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 28.154, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_pertensor_ridge_N24_full", "selected_topk": null, "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 25.493, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "gsm_hard", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/gsm_hard_procrustes_N24_full", "selected_topk": null, "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 25.138, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.07692307692307694, "domain": "math" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed0", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 152.235, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed1", "selected_topk": null, "accuracy": 0.22333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 153.508, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.02857142857142852, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N12_seed2", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 154.26, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 156.161, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.2633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 151.866, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.19999999999999984, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 150.685, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed0", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 155.572, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed1", "selected_topk": null, "accuracy": 0.23666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 152.068, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.08571428571428567, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N12_seed2", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 147.141, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.14285714285714282, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed0", "selected_topk": null, "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 154.561, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.0, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed1", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 152.767, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N16_seed2", "selected_topk": null, "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 155.531, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.2633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 149.424, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.19999999999999984, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 153.68, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.26666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 153.64, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.21428571428571425, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed0", "selected_topk": null, "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 152.497, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.057142857142857155, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed1", "selected_topk": null, "accuracy": 0.23333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 144.794, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.07142857142857141, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N16_seed2", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 155.568, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.09999999999999992, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_pca_N24_full", "selected_topk": null, "accuracy": 0.21, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 156.101, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.028571428571428636, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_pertensor_ridge_N24_full", "selected_topk": null, "accuracy": 0.2733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 153.146, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.24285714285714274, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_plus", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_plus_procrustes_N24_full", "selected_topk": null, "accuracy": 0.22333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 151.112, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.02857142857142852, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 46.874, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed1", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 46.856, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N12_seed2", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 48.369, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 50.528, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 50.432, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.3333333333333333, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 50.815, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 48.905, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed1", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 51.877, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N12_seed2", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 51.357, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 48.404, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed1", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 49.682, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N16_seed2", "selected_topk": null, "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 47.994, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 51.19, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 51.786, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 51.484, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed0", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 47.247, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed1", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 51.2, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N16_seed2", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 50.316, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_pca_N24_full", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 48.081, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_pertensor_ridge_N24_full", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 50.253, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "mbpp_test_held", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/mbpp_test_held_procrustes_N24_full", "selected_topk": null, "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 48.42, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213, "domain": "code" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed0", "selected_topk": null, "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 16.579, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed1", "selected_topk": null, "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 6.928, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N12_seed2", "selected_topk": null, "accuracy": 0.7666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 7.813, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.20731707317073203, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed0", "selected_topk": null, "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 17.824, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.07317073170731714, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed1", "selected_topk": null, "accuracy": 0.7, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 12.94, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": -0.03658536585365857, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N12_seed2", "selected_topk": null, "accuracy": 0.7666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 7.8, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.20731707317073203, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_elementary_math", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed0", "selected_topk": null, "accuracy": 0.7266666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 14.833, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.060975609756097754, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:arc_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed1", "selected_topk": null, "accuracy": 0.7266666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 9.166, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.060975609756097754, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 12, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N12_seed2", "selected_topk": null, "accuracy": 0.7666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 5.952, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.20731707317073203, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed0", "selected_topk": null, "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 10.527, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.08536585365853654, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed1", "selected_topk": null, "accuracy": 0.7666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 11.084, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.20731707317073203, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N16_seed2", "selected_topk": null, "accuracy": 0.7666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 8.293, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.20731707317073203, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed0", "selected_topk": null, "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 15.941, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.08536585365853654, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed1", "selected_topk": null, "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 14.792, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N16_seed2", "selected_topk": null, "accuracy": 0.7633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 7.42, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.19512195121951226, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 0, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:arc_easy", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r5:math_counting_easy", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed0", "selected_topk": null, "accuracy": 0.7433333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 9.618, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1219512195121951, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 1, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:arc_easy", "r4:openbookqa", "r4:math_counting_easy", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed1", "selected_topk": null, "accuracy": 0.7466666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 11.075, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1341463414634149, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 16, "seed": 2, "deterministic_full_pool": false, "method": "procrustes", "topk_K": null, "anchors": [ "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mbpp_sanitized", "r5:conala_curated", "r5:medmcqa_easy" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N16_seed2", "selected_topk": null, "accuracy": 0.76, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 4.516, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.18292682926829285, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_pca", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_pca_N24_full", "selected_topk": null, "accuracy": 0.7666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 13.996, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.20731707317073203, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "pertensor_ridge", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_pertensor_ridge_N24_full", "selected_topk": null, "accuracy": 0.7466666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 23.164, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1341463414634149, "domain": "science" }, { "subexperiment": "1B_pertensor_methods", "task": "openbookqa_test", "N": 24, "seed": 0, "deterministic_full_pool": true, "method": "procrustes", "topk_K": null, "anchors": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "adapter_dir": "/workspace/round3_out/round8/Y_pred/openbookqa_test_procrustes_N24_full", "selected_topk": null, "accuracy": 0.74, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 7.859, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.10975609756097571, "domain": "science" } ], "summary": { "k_sweep": { "2": { "n_records": 5, "gap_recovered_mean": 0.03236034798534796, "gap_recovered_std": 0.2512825052045027, "accuracy_mean": 0.2673333333333333 }, "4": { "n_records": 5, "gap_recovered_mean": 0.06904136513892616, "gap_recovered_std": 0.21041601120601108, "accuracy_mean": 0.272 }, "6": { "n_records": 5, "gap_recovered_mean": 0.08346313916435871, "gap_recovered_std": 0.13586719552143517, "accuracy_mean": 0.2753333333333333 }, "8": { "n_records": 5, "gap_recovered_mean": 0.12873481193603145, "gap_recovered_std": 0.15624061361170813, "accuracy_mean": 0.2813333333333333 }, "12": { "n_records": 5, "gap_recovered_mean": 0.13063497647339112, "gap_recovered_std": 0.1180632522546757, "accuracy_mean": 0.28600000000000003 }, "16": { "n_records": 5, "gap_recovered_mean": 0.12906987998451414, "gap_recovered_std": 0.09660226150894938, "accuracy_mean": 0.2846666666666667 }, "20": { "n_records": 5, "gap_recovered_mean": 0.13002371273712737, "gap_recovered_std": 0.08893282592449571, "accuracy_mean": 0.2846666666666667 }, "24": { "n_records": 5, "gap_recovered_mean": 0.12648059427022842, "gap_recovered_std": 0.08860748821676426, "accuracy_mean": 0.284 } }, "pertensor_methods": { "pertensor_ridge": { "12": { "n_records": 15, "gap_recovered_mean": 0.1148817030981665, "gap_recovered_std": 0.10917220312300747, "accuracy_mean": 0.2786666666666667 }, "16": { "n_records": 15, "gap_recovered_mean": 0.12140822761249587, "gap_recovered_std": 0.0932870579211698, "accuracy_mean": 0.2824444444444445 }, "24": { "n_records": 5, "gap_recovered_mean": 0.12609514130855595, "gap_recovered_std": 0.10935514163267879, "accuracy_mean": 0.28400000000000003 } }, "procrustes": { "12": { "n_records": 15, "gap_recovered_mean": 0.10543183611781175, "gap_recovered_std": 0.08157963485059341, "accuracy_mean": 0.2773333333333333 }, "16": { "n_records": 15, "gap_recovered_mean": 0.10409506313469727, "gap_recovered_std": 0.0855081370879396, "accuracy_mean": 0.2791111111111111 }, "24": { "n_records": 5, "gap_recovered_mean": 0.09999456505554065, "gap_recovered_std": 0.07428709190008917, "accuracy_mean": 0.2753333333333333 } }, "pertensor_pca": { "12": { "n_records": 15, "gap_recovered_mean": 0.09990568261909726, "gap_recovered_std": 0.07590792386005181, "accuracy_mean": 0.27644444444444444 }, "16": { "n_records": 15, "gap_recovered_mean": 0.10040114977614979, "gap_recovered_std": 0.07987630180578464, "accuracy_mean": 0.2777777777777778 }, "24": { "n_records": 5, "gap_recovered_mean": 0.1143281887489205, "gap_recovered_std": 0.10311517826966993, "accuracy_mean": 0.2793333333333333 } } }, "n24_six_method_comparison": { "mean": { "source": "R6", "n_records": 5, "gap_recovered_mean": 0.0830787285208017, "gap_recovered_std": 0.07181727060927716 }, "global_ridge": { "source": "R6", "n_records": 5, "gap_recovered_mean": 0.13478416569879983, "gap_recovered_std": 0.10350184199429305 }, "topk8_global_ridge": { "source": "R6", "n_records": 5, "gap_recovered_mean": 0.12109363366985318, "gap_recovered_std": 0.12401845134797244 }, "pertensor_ridge": { "source": "R8", "n_records": 5, "gap_recovered_mean": 0.12609514130855595, "gap_recovered_std": 0.10935514163267879 }, "procrustes": { "source": "R8", "n_records": 5, "gap_recovered_mean": 0.09999456505554065, "gap_recovered_std": 0.07428709190008917 }, "pertensor_pca": { "source": "R8", "n_records": 5, "gap_recovered_mean": 0.1143281887489205, "gap_recovered_std": 0.10311517826966993 } } }, "derived_from_r6_records": { "description": "Per-task N=16 stats derived from R6 records plus R8 N=16 new-method records; no new compute for R6 columns.", "per_task": { "gsm_hard": { "base_Y": 0.06333333333333334, "oracle": 0.15, "r6_N16": { "mean": { "accuracy": { "mean": 0.06555555555555555, "std": 0.0050917507721731595, "n": 3 }, "gap_recovered": { "mean": 0.025641025641025595, "std": 0.058750970448151855, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "global_ridge": { "accuracy": { "mean": 0.061111111111111116, "std": 0.0038490017945975096, "n": 3 }, "gap_recovered": { "mean": -0.025641025641025675, "std": 0.04441155916843281, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "topk8_global_ridge": { "accuracy": { "mean": 0.06222222222222223, "std": 0.005091750772173158, "n": 3 }, "gap_recovered": { "mean": -0.012820512820512877, "std": 0.05875097044815183, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "r8_N16": { "pertensor_ridge": { "accuracy": { "mean": 0.06444444444444446, "std": 0.0019245008972987488, "n": 3 }, "gap_recovered": { "mean": 0.012820512820512798, "std": 0.02220577958421634, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "procrustes": { "accuracy": { "mean": 0.062222222222222213, "std": 0.005091750772173157, "n": 3 }, "gap_recovered": { "mean": -0.012820512820512877, "std": 0.05875097044815183, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "pertensor_pca": { "accuracy": { "mean": 0.07, "std": 0.006666666666666661, "n": 3 }, "gap_recovered": { "mean": 0.07692307692307689, "std": 0.07692307692307687, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "best_R6_N16": { "method": "mean", "source": "R6", "accuracy_mean": 0.06555555555555555, "accuracy_std": 0.0050917507721731595, "gap_recovered_mean": 0.025641025641025595, "gap_recovered_std": 0.058750970448151855, "n": 3 }, "best_R8_new_N16": { "method": "pertensor_pca", "source": "R8", "accuracy_mean": 0.07, "accuracy_std": 0.006666666666666661, "gap_recovered_mean": 0.07692307692307689, "gap_recovered_std": 0.07692307692307687, "n": 3 }, "best_learned_N16": { "method": "pertensor_pca", "source": "R8", "accuracy_mean": 0.07, "accuracy_std": 0.006666666666666661, "gap_recovered_mean": 0.07692307692307689, "gap_recovered_std": 0.07692307692307687, "n": 3 } }, "gsm8k_test_500": { "base_Y": 0.08, "oracle": 0.29333333333333333, "r6_N16": { "mean": { "accuracy": { "mean": 0.10222222222222221, "std": 0.001924500897298749, "n": 3 }, "gap_recovered": { "mean": 0.10416666666666667, "std": 0.009021097956087886, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "global_ridge": { "accuracy": { "mean": 0.09222222222222222, "std": 0.010715167512214395, "n": 3 }, "gap_recovered": { "mean": 0.057291666666666664, "std": 0.05022734771350498, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "topk8_global_ridge": { "accuracy": { "mean": 0.09333333333333334, "std": 0.008819171036881974, "n": 3 }, "gap_recovered": { "mean": 0.0625, "std": 0.04133986423538425, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "r8_N16": { "pertensor_ridge": { "accuracy": { "mean": 0.08777777777777777, "std": 0.0076980035891950115, "n": 3 }, "gap_recovered": { "mean": 0.03645833333333331, "std": 0.036084391824351615, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "procrustes": { "accuracy": { "mean": 0.09888888888888887, "std": 0.0050917507721731535, "n": 3 }, "gap_recovered": { "mean": 0.08854166666666667, "std": 0.023867581744561665, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "pertensor_pca": { "accuracy": { "mean": 0.10555555555555556, "std": 0.0019245008972987568, "n": 3 }, "gap_recovered": { "mean": 0.11979166666666669, "std": 0.00902109795608792, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "best_R6_N16": { "method": "mean", "source": "R6", "accuracy_mean": 0.10222222222222221, "accuracy_std": 0.001924500897298749, "gap_recovered_mean": 0.10416666666666667, "gap_recovered_std": 0.009021097956087886, "n": 3 }, "best_R8_new_N16": { "method": "pertensor_pca", "source": "R8", "accuracy_mean": 0.10555555555555556, "accuracy_std": 0.0019245008972987568, "gap_recovered_mean": 0.11979166666666669, "gap_recovered_std": 0.00902109795608792, "n": 3 }, "best_learned_N16": { "method": "pertensor_pca", "source": "R8", "accuracy_mean": 0.10555555555555556, "accuracy_std": 0.0019245008972987568, "gap_recovered_mean": 0.11979166666666669, "gap_recovered_std": 0.00902109795608792, "n": 3 } }, "mbpp_test_held": { "base_Y": 0.23, "oracle": 0.32, "r6_N16": { "mean": { "accuracy": { "mean": 0.24, "std": 0.0, "n": 3 }, "gap_recovered": { "mean": 0.1111111111111109, "std": 1.6996749443881478e-17, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "global_ridge": { "accuracy": { "mean": 0.25666666666666665, "std": 0.005773502691896262, "n": 3 }, "gap_recovered": { "mean": 0.2962962962962963, "std": 0.06415002990995847, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "topk8_global_ridge": { "accuracy": { "mean": 0.25, "std": 0.0, "n": 3 }, "gap_recovered": { "mean": 0.22222222222222213, "std": 0.0, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "r8_N16": { "pertensor_ridge": { "accuracy": { "mean": 0.25, "std": 0.0, "n": 3 }, "gap_recovered": { "mean": 0.22222222222222213, "std": 0.0, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "procrustes": { "accuracy": { "mean": 0.25, "std": 0.0, "n": 3 }, "gap_recovered": { "mean": 0.22222222222222213, "std": 0.0, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "pertensor_pca": { "accuracy": { "mean": 0.24333333333333332, "std": 0.005773502691896263, "n": 3 }, "gap_recovered": { "mean": 0.148148148148148, "std": 0.06415002990995848, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "best_R6_N16": { "method": "global_ridge", "source": "R6", "accuracy_mean": 0.25666666666666665, "accuracy_std": 0.005773502691896262, "gap_recovered_mean": 0.2962962962962963, "gap_recovered_std": 0.06415002990995847, "n": 3 }, "best_R8_new_N16": { "method": "pertensor_ridge", "source": "R8", "accuracy_mean": 0.25, "accuracy_std": 0.0, "gap_recovered_mean": 0.22222222222222213, "gap_recovered_std": 0.0, "n": 3 }, "best_learned_N16": { "method": "global_ridge", "source": "R6", "accuracy_mean": 0.25666666666666665, "accuracy_std": 0.005773502691896262, "gap_recovered_mean": 0.2962962962962963, "gap_recovered_std": 0.06415002990995847, "n": 3 } }, "mbpp_plus": { "base_Y": 0.21666666666666667, "oracle": 0.45, "r6_N16": { "mean": { "accuracy": { "mean": 0.21222222222222223, "std": 0.0019245008972987648, "n": 3 }, "gap_recovered": { "mean": -0.019047619047619053, "std": 0.008247860988423278, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "global_ridge": { "accuracy": { "mean": 0.27, "std": 0.003333333333333327, "n": 3 }, "gap_recovered": { "mean": 0.22857142857142854, "std": 0.014285714285714249, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "topk8_global_ridge": { "accuracy": { "mean": 0.2688888888888889, "std": 0.0038490017945974975, "n": 3 }, "gap_recovered": { "mean": 0.22380952380952376, "std": 0.016495721976846407, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "r8_N16": { "pertensor_ridge": { "accuracy": { "mean": 0.26555555555555554, "std": 0.0019245008972987648, "n": 3 }, "gap_recovered": { "mean": 0.20952380952380945, "std": 0.008247860988423292, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "procrustes": { "accuracy": { "mean": 0.23444444444444446, "std": 0.0050917507721731465, "n": 3 }, "gap_recovered": { "mean": 0.07619047619047616, "std": 0.021821789023599193, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "pertensor_pca": { "accuracy": { "mean": 0.21444444444444447, "std": 0.0019245008972987488, "n": 3 }, "gap_recovered": { "mean": -0.009523809523809506, "std": 0.008247860988423209, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "best_R6_N16": { "method": "global_ridge", "source": "R6", "accuracy_mean": 0.27, "accuracy_std": 0.003333333333333327, "gap_recovered_mean": 0.22857142857142854, "gap_recovered_std": 0.014285714285714249, "n": 3 }, "best_R8_new_N16": { "method": "pertensor_ridge", "source": "R8", "accuracy_mean": 0.26555555555555554, "accuracy_std": 0.0019245008972987648, "gap_recovered_mean": 0.20952380952380945, "gap_recovered_std": 0.008247860988423292, "n": 3 }, "best_learned_N16": { "method": "global_ridge", "source": "R6", "accuracy_mean": 0.27, "accuracy_std": 0.003333333333333327, "gap_recovered_mean": 0.22857142857142854, "gap_recovered_std": 0.014285714285714249, "n": 3 } }, "openbookqa_test": { "base_Y": 0.71, "oracle": 0.9833333333333333, "r6_N16": { "mean": { "accuracy": { "mean": 0.7544444444444444, "std": 0.001924500897298797, "n": 3 }, "gap_recovered": { "mean": 0.16260162601626027, "std": 0.007040856941337066, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "global_ridge": { "accuracy": { "mean": 0.7444444444444445, "std": 0.015030832509409663, "n": 3 }, "gap_recovered": { "mean": 0.1260162601626017, "std": 0.054990850644181695, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "topk8_global_ridge": { "accuracy": { "mean": 0.7433333333333333, "std": 0.01527525231651948, "n": 3 }, "gap_recovered": { "mean": 0.12195121951219523, "std": 0.05588506945068102, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "r8_N16": { "pertensor_ridge": { "accuracy": { "mean": 0.7444444444444445, "std": 0.016442942874387488, "n": 3 }, "gap_recovered": { "mean": 0.1260162601626017, "std": 0.060157108077027406, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "procrustes": { "accuracy": { "mean": 0.75, "std": 0.008819171036881984, "n": 3 }, "gap_recovered": { "mean": 0.14634146341463428, "std": 0.03226525989103164, "n": 3 }, "seeds": [ 0, 1, 2 ] }, "pertensor_pca": { "accuracy": { "mean": 0.7555555555555555, "std": 0.019245008972987587, "n": 3 }, "gap_recovered": { "mean": 0.16666666666666685, "std": 0.07040856941336922, "n": 3 }, "seeds": [ 0, 1, 2 ] } }, "best_R6_N16": { "method": "mean", "source": "R6", "accuracy_mean": 0.7544444444444444, "accuracy_std": 0.001924500897298797, "gap_recovered_mean": 0.16260162601626027, "gap_recovered_std": 0.007040856941337066, "n": 3 }, "best_R8_new_N16": { "method": "pertensor_pca", "source": "R8", "accuracy_mean": 0.7555555555555555, "accuracy_std": 0.019245008972987587, "gap_recovered_mean": 0.16666666666666685, "gap_recovered_std": 0.07040856941336922, "n": 3 }, "best_learned_N16": { "method": "pertensor_pca", "source": "R8", "accuracy_mean": 0.7555555555555555, "accuracy_std": 0.019245008972987587, "gap_recovered_mean": 0.16666666666666685, "gap_recovered_std": 0.07040856941336922, "n": 3 } } } }, "derived_from_r6_records_source": { "results_round6_json_pulled_from_hub": true, "r6_record_count": 195 } }