{ "config": { "hub_repo": "CK0607/cross-model-lora-prediction-3b", "model_Y": "meta-llama/Llama-3.2-3B-Instruct", "no_surrogate": true, "generation": { "do_sample": false, "num_beams": 1, "max_new_tokens_code": 96, "max_new_tokens_other": 24 }, "heldouts": [ "gsm_hard", "gsm8k_test_500", "mbpp_test_held", "mbpp_plus", "openbookqa_test" ], "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ] }, "adapter_verification": { "listing": { "round4/X": [ "aqua_rat", "arc_challenge", "arc_easy", "gsm8k", "gsm8k_test_500", "gsm_hard", "humaneval", "math_algebra_easy", "math_counting_easy", "mbpp", "mbpp_plus", "mbpp_sanitized", "mbpp_test_held", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics", "multiarith", "openbookqa", "openbookqa_test", "sciq", "svamp" ], "round4/Y": [ "aqua_rat", "arc_challenge", "arc_easy", "gsm8k", "gsm8k_test_500", "gsm_hard", "humaneval", "math_algebra_easy", "math_counting_easy", "mbpp", "mbpp_plus", "mbpp_sanitized", "mbpp_test_held", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics", "multiarith", "openbookqa", "openbookqa_test", "sciq", "svamp" ], "round5/X": [ "aqua_rat_numeric", "conala_curated", "humaneval", "math_counting_easy", "mawps", "mbpp_sanitized", "medmcqa_easy", "pubmedqa_pqal" ], "round5/Y": [ "aqua_rat_numeric", "conala_curated", "humaneval", "math_counting_easy", "mawps", "mbpp_sanitized", "medmcqa_easy", "pubmedqa_pqal" ], "round6/Y_pred": [ "gsm8k_test_500_global_ridge_N12_seed0", "gsm8k_test_500_global_ridge_N12_seed1", "gsm8k_test_500_global_ridge_N12_seed2", "gsm8k_test_500_global_ridge_N16_seed0", "gsm8k_test_500_global_ridge_N16_seed1", "gsm8k_test_500_global_ridge_N16_seed2", "gsm8k_test_500_global_ridge_N24_full", "gsm8k_test_500_global_ridge_N4_seed0", "gsm8k_test_500_global_ridge_N4_seed1", "gsm8k_test_500_global_ridge_N4_seed2", "gsm8k_test_500_global_ridge_N8_seed0", "gsm8k_test_500_global_ridge_N8_seed1", "gsm8k_test_500_global_ridge_N8_seed2", "gsm8k_test_500_mean_N12_seed0", "gsm8k_test_500_mean_N12_seed1", "gsm8k_test_500_mean_N12_seed2", "gsm8k_test_500_mean_N16_seed0", "gsm8k_test_500_mean_N16_seed1", "gsm8k_test_500_mean_N16_seed2", "gsm8k_test_500_mean_N24_full", "gsm8k_test_500_mean_N4_seed0", "gsm8k_test_500_mean_N4_seed1", "gsm8k_test_500_mean_N4_seed2", "gsm8k_test_500_mean_N8_seed0", "gsm8k_test_500_mean_N8_seed1", "gsm8k_test_500_mean_N8_seed2", "gsm8k_test_500_topk8_global_ridge_N12_seed0", "gsm8k_test_500_topk8_global_ridge_N12_seed1", "gsm8k_test_500_topk8_global_ridge_N12_seed2", "gsm8k_test_500_topk8_global_ridge_N16_seed0", "gsm8k_test_500_topk8_global_ridge_N16_seed1", "gsm8k_test_500_topk8_global_ridge_N16_seed2", "gsm8k_test_500_topk8_global_ridge_N24_full", "gsm8k_test_500_topk8_global_ridge_N4_seed0", "gsm8k_test_500_topk8_global_ridge_N4_seed1", "gsm8k_test_500_topk8_global_ridge_N4_seed2", "gsm8k_test_500_topk8_global_ridge_N8_seed0", "gsm8k_test_500_topk8_global_ridge_N8_seed1", "gsm8k_test_500_topk8_global_ridge_N8_seed2", "gsm_hard_global_ridge_N12_seed0", "gsm_hard_global_ridge_N12_seed1", "gsm_hard_global_ridge_N12_seed2", "gsm_hard_global_ridge_N16_seed0", "gsm_hard_global_ridge_N16_seed1", "gsm_hard_global_ridge_N16_seed2", "gsm_hard_global_ridge_N24_full", "gsm_hard_global_ridge_N4_seed0", "gsm_hard_global_ridge_N4_seed1", "gsm_hard_global_ridge_N4_seed2", "gsm_hard_global_ridge_N8_seed0", "gsm_hard_global_ridge_N8_seed1", "gsm_hard_global_ridge_N8_seed2", "gsm_hard_mean_N12_seed0", "gsm_hard_mean_N12_seed1", "gsm_hard_mean_N12_seed2", "gsm_hard_mean_N16_seed0", "gsm_hard_mean_N16_seed1", "gsm_hard_mean_N16_seed2", "gsm_hard_mean_N24_full", "gsm_hard_mean_N4_seed0", "gsm_hard_mean_N4_seed1", "gsm_hard_mean_N4_seed2", "gsm_hard_mean_N8_seed0", "gsm_hard_mean_N8_seed1", "gsm_hard_mean_N8_seed2", "gsm_hard_topk8_global_ridge_N12_seed0", "gsm_hard_topk8_global_ridge_N12_seed1", "gsm_hard_topk8_global_ridge_N12_seed2", "gsm_hard_topk8_global_ridge_N16_seed0", "gsm_hard_topk8_global_ridge_N16_seed1", "gsm_hard_topk8_global_ridge_N16_seed2", "gsm_hard_topk8_global_ridge_N24_full", "gsm_hard_topk8_global_ridge_N4_seed0", "gsm_hard_topk8_global_ridge_N4_seed1", "gsm_hard_topk8_global_ridge_N4_seed2", "gsm_hard_topk8_global_ridge_N8_seed0", "gsm_hard_topk8_global_ridge_N8_seed1", "gsm_hard_topk8_global_ridge_N8_seed2", "mbpp_plus_global_ridge_N12_seed0", "mbpp_plus_global_ridge_N12_seed1", "mbpp_plus_global_ridge_N12_seed2", "mbpp_plus_global_ridge_N16_seed0", "mbpp_plus_global_ridge_N16_seed1", "mbpp_plus_global_ridge_N16_seed2", "mbpp_plus_global_ridge_N24_full", "mbpp_plus_global_ridge_N4_seed0", "mbpp_plus_global_ridge_N4_seed1", "mbpp_plus_global_ridge_N4_seed2", "mbpp_plus_global_ridge_N8_seed0", "mbpp_plus_global_ridge_N8_seed1", "mbpp_plus_global_ridge_N8_seed2", "mbpp_plus_mean_N12_seed0", "mbpp_plus_mean_N12_seed1", "mbpp_plus_mean_N12_seed2", "mbpp_plus_mean_N16_seed0", "mbpp_plus_mean_N16_seed1", "mbpp_plus_mean_N16_seed2", "mbpp_plus_mean_N24_full", "mbpp_plus_mean_N4_seed0", "mbpp_plus_mean_N4_seed1", "mbpp_plus_mean_N4_seed2", "mbpp_plus_mean_N8_seed0", "mbpp_plus_mean_N8_seed1", "mbpp_plus_mean_N8_seed2", "mbpp_plus_topk8_global_ridge_N12_seed0", "mbpp_plus_topk8_global_ridge_N12_seed1", "mbpp_plus_topk8_global_ridge_N12_seed2", "mbpp_plus_topk8_global_ridge_N16_seed0", "mbpp_plus_topk8_global_ridge_N16_seed1", "mbpp_plus_topk8_global_ridge_N16_seed2", "mbpp_plus_topk8_global_ridge_N24_full", "mbpp_plus_topk8_global_ridge_N4_seed0", "mbpp_plus_topk8_global_ridge_N4_seed1", "mbpp_plus_topk8_global_ridge_N4_seed2", "mbpp_plus_topk8_global_ridge_N8_seed0", "mbpp_plus_topk8_global_ridge_N8_seed1", "mbpp_plus_topk8_global_ridge_N8_seed2", "mbpp_test_held_global_ridge_N12_seed0", "mbpp_test_held_global_ridge_N12_seed1", "mbpp_test_held_global_ridge_N12_seed2", "mbpp_test_held_global_ridge_N16_seed0", "mbpp_test_held_global_ridge_N16_seed1", "mbpp_test_held_global_ridge_N16_seed2", "mbpp_test_held_global_ridge_N24_full", "mbpp_test_held_global_ridge_N4_seed0", "mbpp_test_held_global_ridge_N4_seed1", "mbpp_test_held_global_ridge_N4_seed2", "mbpp_test_held_global_ridge_N8_seed0", "mbpp_test_held_global_ridge_N8_seed1", "mbpp_test_held_global_ridge_N8_seed2", "mbpp_test_held_mean_N12_seed0", "mbpp_test_held_mean_N12_seed1", "mbpp_test_held_mean_N12_seed2", "mbpp_test_held_mean_N16_seed0", "mbpp_test_held_mean_N16_seed1", "mbpp_test_held_mean_N16_seed2", "mbpp_test_held_mean_N24_full", "mbpp_test_held_mean_N4_seed0", "mbpp_test_held_mean_N4_seed1", "mbpp_test_held_mean_N4_seed2", "mbpp_test_held_mean_N8_seed0", "mbpp_test_held_mean_N8_seed1", "mbpp_test_held_mean_N8_seed2", "mbpp_test_held_topk8_global_ridge_N12_seed0", "mbpp_test_held_topk8_global_ridge_N12_seed1", "mbpp_test_held_topk8_global_ridge_N12_seed2", "mbpp_test_held_topk8_global_ridge_N16_seed0", "mbpp_test_held_topk8_global_ridge_N16_seed1", "mbpp_test_held_topk8_global_ridge_N16_seed2", "mbpp_test_held_topk8_global_ridge_N24_full", "mbpp_test_held_topk8_global_ridge_N4_seed0", "mbpp_test_held_topk8_global_ridge_N4_seed1", "mbpp_test_held_topk8_global_ridge_N4_seed2", "mbpp_test_held_topk8_global_ridge_N8_seed0", "mbpp_test_held_topk8_global_ridge_N8_seed1", "mbpp_test_held_topk8_global_ridge_N8_seed2", "openbookqa_test_global_ridge_N12_seed0", "openbookqa_test_global_ridge_N12_seed1", "openbookqa_test_global_ridge_N12_seed2", "openbookqa_test_global_ridge_N16_seed0", "openbookqa_test_global_ridge_N16_seed1", "openbookqa_test_global_ridge_N16_seed2", "openbookqa_test_global_ridge_N24_full", "openbookqa_test_global_ridge_N4_seed0", "openbookqa_test_global_ridge_N4_seed1", "openbookqa_test_global_ridge_N4_seed2", "openbookqa_test_global_ridge_N8_seed0", "openbookqa_test_global_ridge_N8_seed1", "openbookqa_test_global_ridge_N8_seed2", "openbookqa_test_mean_N12_seed0", "openbookqa_test_mean_N12_seed1", "openbookqa_test_mean_N12_seed2", "openbookqa_test_mean_N16_seed0", "openbookqa_test_mean_N16_seed1", "openbookqa_test_mean_N16_seed2", "openbookqa_test_mean_N24_full", "openbookqa_test_mean_N4_seed0", "openbookqa_test_mean_N4_seed1", "openbookqa_test_mean_N4_seed2", "openbookqa_test_mean_N8_seed0", "openbookqa_test_mean_N8_seed1", "openbookqa_test_mean_N8_seed2", "openbookqa_test_topk8_global_ridge_N12_seed0", "openbookqa_test_topk8_global_ridge_N12_seed1", "openbookqa_test_topk8_global_ridge_N12_seed2", "openbookqa_test_topk8_global_ridge_N16_seed0", "openbookqa_test_topk8_global_ridge_N16_seed1", "openbookqa_test_topk8_global_ridge_N16_seed2", "openbookqa_test_topk8_global_ridge_N24_full", "openbookqa_test_topk8_global_ridge_N4_seed0", "openbookqa_test_topk8_global_ridge_N4_seed1", "openbookqa_test_topk8_global_ridge_N4_seed2", "openbookqa_test_topk8_global_ridge_N8_seed0", "openbookqa_test_topk8_global_ridge_N8_seed1", "openbookqa_test_topk8_global_ridge_N8_seed2" ], "round8/Y_pred": [ "gsm8k_test_500_pertensor_pca_N12_seed0", "gsm8k_test_500_pertensor_pca_N12_seed1", "gsm8k_test_500_pertensor_pca_N12_seed2", "gsm8k_test_500_pertensor_pca_N16_seed0", "gsm8k_test_500_pertensor_pca_N16_seed1", "gsm8k_test_500_pertensor_pca_N16_seed2", "gsm8k_test_500_pertensor_pca_N24_full", "gsm8k_test_500_pertensor_ridge_N12_seed0", "gsm8k_test_500_pertensor_ridge_N12_seed1", "gsm8k_test_500_pertensor_ridge_N12_seed2", "gsm8k_test_500_pertensor_ridge_N16_seed0", "gsm8k_test_500_pertensor_ridge_N16_seed1", "gsm8k_test_500_pertensor_ridge_N16_seed2", "gsm8k_test_500_pertensor_ridge_N24_full", "gsm8k_test_500_procrustes_N12_seed0", "gsm8k_test_500_procrustes_N12_seed1", "gsm8k_test_500_procrustes_N12_seed2", "gsm8k_test_500_procrustes_N16_seed0", "gsm8k_test_500_procrustes_N16_seed1", "gsm8k_test_500_procrustes_N16_seed2", "gsm8k_test_500_procrustes_N24_full", "gsm8k_test_500_topk12_global_ridge_N24_full", "gsm8k_test_500_topk16_global_ridge_N24_full", "gsm8k_test_500_topk20_global_ridge_N24_full", "gsm8k_test_500_topk24_global_ridge_N24_full", "gsm8k_test_500_topk2_global_ridge_N24_full", "gsm8k_test_500_topk4_global_ridge_N24_full", "gsm8k_test_500_topk6_global_ridge_N24_full", "gsm8k_test_500_topk8_global_ridge_N24_full", "gsm_hard_pertensor_pca_N12_seed0", "gsm_hard_pertensor_pca_N12_seed1", "gsm_hard_pertensor_pca_N12_seed2", "gsm_hard_pertensor_pca_N16_seed0", "gsm_hard_pertensor_pca_N16_seed1", "gsm_hard_pertensor_pca_N16_seed2", "gsm_hard_pertensor_pca_N24_full", "gsm_hard_pertensor_ridge_N12_seed0", "gsm_hard_pertensor_ridge_N12_seed1", "gsm_hard_pertensor_ridge_N12_seed2", "gsm_hard_pertensor_ridge_N16_seed0", "gsm_hard_pertensor_ridge_N16_seed1", "gsm_hard_pertensor_ridge_N16_seed2", "gsm_hard_pertensor_ridge_N24_full", "gsm_hard_procrustes_N12_seed0", "gsm_hard_procrustes_N12_seed1", "gsm_hard_procrustes_N12_seed2", "gsm_hard_procrustes_N16_seed0", "gsm_hard_procrustes_N16_seed1", "gsm_hard_procrustes_N16_seed2", "gsm_hard_procrustes_N24_full", "gsm_hard_topk12_global_ridge_N24_full", "gsm_hard_topk16_global_ridge_N24_full", "gsm_hard_topk20_global_ridge_N24_full", "gsm_hard_topk24_global_ridge_N24_full", "gsm_hard_topk2_global_ridge_N24_full", "gsm_hard_topk4_global_ridge_N24_full", "gsm_hard_topk6_global_ridge_N24_full", "gsm_hard_topk8_global_ridge_N24_full", "mbpp_plus_pertensor_pca_N12_seed0", "mbpp_plus_pertensor_pca_N12_seed1", "mbpp_plus_pertensor_pca_N12_seed2", "mbpp_plus_pertensor_pca_N16_seed0", "mbpp_plus_pertensor_pca_N16_seed1", "mbpp_plus_pertensor_pca_N16_seed2", "mbpp_plus_pertensor_pca_N24_full", "mbpp_plus_pertensor_ridge_N12_seed0", "mbpp_plus_pertensor_ridge_N12_seed1", "mbpp_plus_pertensor_ridge_N12_seed2", "mbpp_plus_pertensor_ridge_N16_seed0", "mbpp_plus_pertensor_ridge_N16_seed1", "mbpp_plus_pertensor_ridge_N16_seed2", "mbpp_plus_pertensor_ridge_N24_full", "mbpp_plus_procrustes_N12_seed0", "mbpp_plus_procrustes_N12_seed1", "mbpp_plus_procrustes_N12_seed2", "mbpp_plus_procrustes_N16_seed0", "mbpp_plus_procrustes_N16_seed1", "mbpp_plus_procrustes_N16_seed2", "mbpp_plus_procrustes_N24_full", "mbpp_plus_topk12_global_ridge_N24_full", "mbpp_plus_topk16_global_ridge_N24_full", "mbpp_plus_topk20_global_ridge_N24_full", "mbpp_plus_topk24_global_ridge_N24_full", "mbpp_plus_topk2_global_ridge_N24_full", "mbpp_plus_topk4_global_ridge_N24_full", "mbpp_plus_topk6_global_ridge_N24_full", "mbpp_plus_topk8_global_ridge_N24_full", "mbpp_test_held_pertensor_pca_N12_seed0", "mbpp_test_held_pertensor_pca_N12_seed1", "mbpp_test_held_pertensor_pca_N12_seed2", "mbpp_test_held_pertensor_pca_N16_seed0", "mbpp_test_held_pertensor_pca_N16_seed1", "mbpp_test_held_pertensor_pca_N16_seed2", "mbpp_test_held_pertensor_pca_N24_full", "mbpp_test_held_pertensor_ridge_N12_seed0", "mbpp_test_held_pertensor_ridge_N12_seed1", "mbpp_test_held_pertensor_ridge_N12_seed2", "mbpp_test_held_pertensor_ridge_N16_seed0", "mbpp_test_held_pertensor_ridge_N16_seed1", "mbpp_test_held_pertensor_ridge_N16_seed2", "mbpp_test_held_pertensor_ridge_N24_full", "mbpp_test_held_procrustes_N12_seed0", "mbpp_test_held_procrustes_N12_seed1", "mbpp_test_held_procrustes_N12_seed2", "mbpp_test_held_procrustes_N16_seed0", "mbpp_test_held_procrustes_N16_seed1", "mbpp_test_held_procrustes_N16_seed2", "mbpp_test_held_procrustes_N24_full", "mbpp_test_held_topk12_global_ridge_N24_full", "mbpp_test_held_topk16_global_ridge_N24_full", "mbpp_test_held_topk20_global_ridge_N24_full", "mbpp_test_held_topk24_global_ridge_N24_full", "mbpp_test_held_topk2_global_ridge_N24_full", "mbpp_test_held_topk4_global_ridge_N24_full", "mbpp_test_held_topk6_global_ridge_N24_full", "mbpp_test_held_topk8_global_ridge_N24_full", "openbookqa_test_pertensor_pca_N12_seed0", "openbookqa_test_pertensor_pca_N12_seed1", "openbookqa_test_pertensor_pca_N12_seed2", "openbookqa_test_pertensor_pca_N16_seed0", "openbookqa_test_pertensor_pca_N16_seed1", "openbookqa_test_pertensor_pca_N16_seed2", "openbookqa_test_pertensor_pca_N24_full", "openbookqa_test_pertensor_ridge_N12_seed0", "openbookqa_test_pertensor_ridge_N12_seed1", "openbookqa_test_pertensor_ridge_N12_seed2", "openbookqa_test_pertensor_ridge_N16_seed0", "openbookqa_test_pertensor_ridge_N16_seed1", "openbookqa_test_pertensor_ridge_N16_seed2", "openbookqa_test_pertensor_ridge_N24_full", "openbookqa_test_procrustes_N12_seed0", "openbookqa_test_procrustes_N12_seed1", "openbookqa_test_procrustes_N12_seed2", "openbookqa_test_procrustes_N16_seed0", "openbookqa_test_procrustes_N16_seed1", "openbookqa_test_procrustes_N16_seed2", "openbookqa_test_procrustes_N24_full", "openbookqa_test_topk12_global_ridge_N24_full", "openbookqa_test_topk16_global_ridge_N24_full", "openbookqa_test_topk20_global_ridge_N24_full", "openbookqa_test_topk24_global_ridge_N24_full", "openbookqa_test_topk2_global_ridge_N24_full", "openbookqa_test_topk4_global_ridge_N24_full", "openbookqa_test_topk6_global_ridge_N24_full", "openbookqa_test_topk8_global_ridge_N24_full" ] }, "missing": [], "count_warnings": [] }, "baselines": { "gsm_hard": { "base_Y": 0.06333333333333334, "oracle": 0.15 }, "gsm8k_test_500": { "base_Y": 0.08, "oracle": 0.29333333333333333 }, "mbpp_test_held": { "base_Y": 0.23, "oracle": 0.32 }, "mbpp_plus": { "base_Y": 0.21666666666666667, "oracle": 0.45 }, "openbookqa_test": { "base_Y": 0.71, "oracle": 0.9833333333333333 } }, "summary": { "overall_spearman": 0.1299305286501594, "per_heldout": { "gsm_hard": 0.28540339681524574, "gsm8k_test_500": 0.04367789323240817, "mbpp_test_held": -0.2896337305973781, "mbpp_plus": -0.10085771557215327, "openbookqa_test": -0.2975768832863932 }, "per_target_domain": { "code": 0.017636096528107978, "math": 0.2826406573020395, "science": -0.2975768832863932 }, "per_anchor_domain": { "code": 0.14899075217681867, "math": 0.07629665365697172, "science": 0.2376382913201063 }, "decision": "weak locality, ridge subsumes it" }, "records": [ { "cell_id": "A::gsm8k_test_500::r4:humaneval", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:humaneval", "anchor_name": "humaneval", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9497017860412598, "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", "accuracy": 0.17333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 28.044, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.43750000000000006 }, { "cell_id": "A::gsm8k_test_500::r4:mbpp", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:mbpp", "anchor_name": "mbpp", "anchor_round": "r4", "anchor_domain": "code", "cos_X": -0.00027140171732753515, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 24.609, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.09375000000000003 }, { "cell_id": "A::gsm8k_test_500::r4:mbpp_sanitized", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9527238011360168, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 23.479, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.06250000000000001 }, { "cell_id": "A::gsm8k_test_500::r5:conala_curated", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:conala_curated", "anchor_name": "conala_curated", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.8599404692649841, "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", "accuracy": 0.18333333333333332, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 28.998, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.48437499999999994 }, { "cell_id": "A::gsm8k_test_500::r5:humaneval", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:humaneval", "anchor_name": "humaneval", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.9497017860412598, "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", "accuracy": 0.17333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 29.242, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.43750000000000006 }, { "cell_id": "A::gsm8k_test_500::r5:mbpp_sanitized", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r5", "anchor_domain": "code", "cos_X": -0.00027140171732753515, "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 25.368, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.09375000000000003 }, { "cell_id": "A::gsm8k_test_500::r4:aqua_rat", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:aqua_rat", "anchor_name": "aqua_rat", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8731036186218262, "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 28.153, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.10937500000000003 }, { "cell_id": "A::gsm8k_test_500::r4:gsm8k", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:gsm8k", "anchor_name": "gsm8k", "anchor_round": "r4", "anchor_domain": "math", "cos_X": -0.0006812263745814562, "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", "accuracy": 0.14, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 6.608, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.28125000000000006 }, { "cell_id": "A::gsm8k_test_500::r4:math_algebra_easy", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:math_algebra_easy", "anchor_name": "math_algebra_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9428298473358154, "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", "accuracy": 0.08333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 6.667, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.015624999999999972 }, { "cell_id": "A::gsm8k_test_500::r4:math_counting_easy", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9606146216392517, "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 10.451, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.03125000000000001 }, { "cell_id": "A::gsm8k_test_500::r4:multiarith", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:multiarith", "anchor_name": "multiarith", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9475974440574646, "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 15.16, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.015625000000000038 }, { "cell_id": "A::gsm8k_test_500::r4:svamp", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:svamp", "anchor_name": "svamp", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9288908839225769, "adapter_dir": "/workspace/round3_out/round4/Y/svamp", "accuracy": 0.07, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 5.968, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.04687499999999998 }, { "cell_id": "A::gsm8k_test_500::r5:aqua_rat_numeric", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:aqua_rat_numeric", "anchor_name": "aqua_rat_numeric", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0004875913728028536, "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 29.239, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.0 }, { "cell_id": "A::gsm8k_test_500::r5:math_counting_easy", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0004786302160937339, "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 8.228, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.03125000000000001 }, { "cell_id": "A::gsm8k_test_500::r5:mawps", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:mawps", "anchor_name": "mawps", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0008274300489574671, "adapter_dir": "/workspace/round3_out/round5/Y/mawps", "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 10.047, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.015625000000000038 }, { "cell_id": "A::gsm8k_test_500::r4:arc_easy", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:arc_easy", "anchor_name": "arc_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.0004913151497021317, "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 28.256, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.03125000000000001 }, { "cell_id": "A::gsm8k_test_500::r4:medmcqa_easy", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8598978519439697, "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.938, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.06250000000000001 }, { "cell_id": "A::gsm8k_test_500::r4:mmlu_elementary_math", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:mmlu_elementary_math", "anchor_name": "mmlu_elementary_math", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9377825260162354, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 26.479, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.0 }, { "cell_id": "A::gsm8k_test_500::r4:mmlu_high_school_biology", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:mmlu_high_school_biology", "anchor_name": "mmlu_high_school_biology", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9370604753494263, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.829, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": -0.015625000000000038 }, { "cell_id": "A::gsm8k_test_500::r4:mmlu_high_school_physics", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:mmlu_high_school_physics", "anchor_name": "mmlu_high_school_physics", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9513278603553772, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 27.159, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.06250000000000001 }, { "cell_id": "A::gsm8k_test_500::r4:openbookqa", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:openbookqa", "anchor_name": "openbookqa", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8619711995124817, "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 29.32, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.07812499999999999 }, { "cell_id": "A::gsm8k_test_500::r4:sciq", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r4:sciq", "anchor_name": "sciq", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.00034972114372067153, "adapter_dir": "/workspace/round3_out/round4/Y/sciq", "accuracy": 0.1, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 28.327, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.09375000000000003 }, { "cell_id": "A::gsm8k_test_500::r5:medmcqa_easy", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8598978519439697, "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", "accuracy": 0.09333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 28.144, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.06250000000000001 }, { "cell_id": "A::gsm8k_test_500::r5:pubmedqa_pqal", "stage": "locality_single_anchor", "task": "gsm8k_test_500", "target_domain": "math", "anchor_ref": "r5:pubmedqa_pqal", "anchor_name": "pubmedqa_pqal", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8853808641433716, "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", "accuracy": 0.08, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 18.209, "base_Y": 0.08, "oracle": 0.29333333333333333, "single_anchor_gap": 0.0 }, { "cell_id": "A::gsm_hard::r4:humaneval", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:humaneval", "anchor_name": "humaneval", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.8956640958786011, "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 28.25, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": 0.15384615384615374 }, { "cell_id": "A::gsm_hard::r4:mbpp", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:mbpp", "anchor_name": "mbpp", "anchor_round": "r4", "anchor_domain": "code", "cos_X": -0.000505593023262918, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 27.549, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.07692307692307702 }, { "cell_id": "A::gsm_hard::r4:mbpp_sanitized", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.8983818888664246, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", "accuracy": 0.05333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 27.652, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.11538461538461542 }, { "cell_id": "A::gsm_hard::r5:conala_curated", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:conala_curated", "anchor_name": "conala_curated", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.8125380277633667, "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", "accuracy": 0.07333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 28.894, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": 0.11538461538461534 }, { "cell_id": "A::gsm_hard::r5:humaneval", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:humaneval", "anchor_name": "humaneval", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.8956640958786011, "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", "accuracy": 0.07666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 29.186, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": 0.15384615384615374 }, { "cell_id": "A::gsm_hard::r5:mbpp_sanitized", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r5", "anchor_domain": "code", "cos_X": -0.000505593023262918, "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 28.051, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.07692307692307702 }, { "cell_id": "A::gsm_hard::r4:aqua_rat", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:aqua_rat", "anchor_name": "aqua_rat", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8249850273132324, "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", "accuracy": 0.05333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 28.068, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.11538461538461542 }, { "cell_id": "A::gsm_hard::r4:gsm8k", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:gsm8k", "anchor_name": "gsm8k", "anchor_round": "r4", "anchor_domain": "math", "cos_X": -0.0006781710544601083, "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 14.301, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": 0.038461538461538394 }, { "cell_id": "A::gsm_hard::r4:math_algebra_easy", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:math_algebra_easy", "anchor_name": "math_algebra_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8887502551078796, "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", "accuracy": 0.05333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 17.285, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.11538461538461542 }, { "cell_id": "A::gsm_hard::r4:math_counting_easy", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9051075577735901, "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", "accuracy": 0.03666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 20.983, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.30769230769230776 }, { "cell_id": "A::gsm_hard::r4:multiarith", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:multiarith", "anchor_name": "multiarith", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8932132124900818, "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", "accuracy": 0.04666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 26.632, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.19230769230769237 }, { "cell_id": "A::gsm_hard::r4:svamp", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:svamp", "anchor_name": "svamp", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8764325380325317, "adapter_dir": "/workspace/round3_out/round4/Y/svamp", "accuracy": 0.06666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 23.451, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": 0.038461538461538394 }, { "cell_id": "A::gsm_hard::r5:aqua_rat_numeric", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:aqua_rat_numeric", "anchor_name": "aqua_rat_numeric", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0008573997183702886, "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", "accuracy": 0.03666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 29.81, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.30769230769230776 }, { "cell_id": "A::gsm_hard::r5:math_counting_easy", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0006265510455705225, "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", "accuracy": 0.04, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 19.003, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.26923076923076933 }, { "cell_id": "A::gsm_hard::r5:mawps", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:mawps", "anchor_name": "mawps", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0011123694712296128, "adapter_dir": "/workspace/round3_out/round5/Y/mawps", "accuracy": 0.02, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 20.956, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.5000000000000001 }, { "cell_id": "A::gsm_hard::r4:arc_easy", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:arc_easy", "anchor_name": "arc_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.0009216612670570612, "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", "accuracy": 0.05, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 29.551, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.15384615384615388 }, { "cell_id": "A::gsm_hard::r4:medmcqa_easy", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8125821948051453, "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", "accuracy": 0.03333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.713, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.34615384615384626 }, { "cell_id": "A::gsm_hard::r4:mmlu_elementary_math", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:mmlu_elementary_math", "anchor_name": "mmlu_elementary_math", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8851031064987183, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", "accuracy": 0.03, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 29.335, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.38461538461538475 }, { "cell_id": "A::gsm_hard::r4:mmlu_high_school_biology", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:mmlu_high_school_biology", "anchor_name": "mmlu_high_school_biology", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8839024305343628, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", "accuracy": 0.04, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 29.358, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.26923076923076933 }, { "cell_id": "A::gsm_hard::r4:mmlu_high_school_physics", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:mmlu_high_school_physics", "anchor_name": "mmlu_high_school_physics", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8970074653625488, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 28.333, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.038461538461538554 }, { "cell_id": "A::gsm_hard::r4:openbookqa", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:openbookqa", "anchor_name": "openbookqa", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8150046467781067, "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", "accuracy": 0.056666666666666664, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 29.83, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.07692307692307702 }, { "cell_id": "A::gsm_hard::r4:sciq", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r4:sciq", "anchor_name": "sciq", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.0006476823473349214, "adapter_dir": "/workspace/round3_out/round4/Y/sciq", "accuracy": 0.023333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 28.818, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.4615384615384617 }, { "cell_id": "A::gsm_hard::r5:medmcqa_easy", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8125821948051453, "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", "accuracy": 0.03333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 28.137, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.34615384615384626 }, { "cell_id": "A::gsm_hard::r5:pubmedqa_pqal", "stage": "locality_single_anchor", "task": "gsm_hard", "target_domain": "math", "anchor_ref": "r5:pubmedqa_pqal", "anchor_name": "pubmedqa_pqal", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8367233276367188, "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", "accuracy": 0.05, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.028, "base_Y": 0.06333333333333334, "oracle": 0.15, "single_anchor_gap": -0.15384615384615388 }, { "cell_id": "A::mbpp_plus::r4:humaneval", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:humaneval", "anchor_name": "humaneval", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9624950885772705, "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", "accuracy": 0.2, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 150.314, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.07142857142857141 }, { "cell_id": "A::mbpp_plus::r4:mbpp", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:mbpp", "anchor_name": "mbpp", "anchor_round": "r4", "anchor_domain": "code", "cos_X": -0.0003052547399420291, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", "accuracy": 0.2833333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 146.154, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": 0.28571428571428564 }, { "cell_id": "A::mbpp_plus::r4:mbpp_sanitized", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9884072542190552, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", "accuracy": 0.2633333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 149.756, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": 0.19999999999999984 }, { "cell_id": "A::mbpp_plus::r5:conala_curated", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:conala_curated", "anchor_name": "conala_curated", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.8741890788078308, "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", "accuracy": 0.18666666666666668, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 217.294, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.12857142857142856 }, { "cell_id": "A::mbpp_plus::r5:humaneval", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:humaneval", "anchor_name": "humaneval", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.9624950885772705, "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", "accuracy": 0.2, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 157.393, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.07142857142857141 }, { "cell_id": "A::mbpp_plus::r5:mbpp_sanitized", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r5", "anchor_domain": "code", "cos_X": -0.0003052547399420291, "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", "accuracy": 0.2833333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 149.779, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": 0.28571428571428564 }, { "cell_id": "A::mbpp_plus::r4:aqua_rat", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:aqua_rat", "anchor_name": "aqua_rat", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8814506530761719, "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", "accuracy": 0.21666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 165.548, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": 0.0 }, { "cell_id": "A::mbpp_plus::r4:gsm8k", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:gsm8k", "anchor_name": "gsm8k", "anchor_round": "r4", "anchor_domain": "math", "cos_X": -0.000335412856657058, "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", "accuracy": 0.20666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 161.016, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.04285714285714289 }, { "cell_id": "A::mbpp_plus::r4:math_algebra_easy", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:math_algebra_easy", "anchor_name": "math_algebra_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9431692361831665, "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 152.059, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.01428571428571426 }, { "cell_id": "A::mbpp_plus::r4:math_counting_easy", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9616791009902954, "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", "accuracy": 0.22333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 161.601, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": 0.02857142857142852 }, { "cell_id": "A::mbpp_plus::r4:multiarith", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:multiarith", "anchor_name": "multiarith", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9476068615913391, "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", "accuracy": 0.18333333333333332, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 157.375, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.14285714285714293 }, { "cell_id": "A::mbpp_plus::r4:svamp", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:svamp", "anchor_name": "svamp", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9265090823173523, "adapter_dir": "/workspace/round3_out/round4/Y/svamp", "accuracy": 0.2, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 156.805, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.07142857142857141 }, { "cell_id": "A::mbpp_plus::r5:aqua_rat_numeric", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:aqua_rat_numeric", "anchor_name": "aqua_rat_numeric", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.00045860654790885746, "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 181.409, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.01428571428571426 }, { "cell_id": "A::mbpp_plus::r5:math_counting_easy", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0002999037387780845, "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", "accuracy": 0.22, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 153.077, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": 0.01428571428571426 }, { "cell_id": "A::mbpp_plus::r5:mawps", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:mawps", "anchor_name": "mawps", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0004362465988378972, "adapter_dir": "/workspace/round3_out/round5/Y/mawps", "accuracy": 0.19666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 160.786, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.08571428571428578 }, { "cell_id": "A::mbpp_plus::r4:arc_easy", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:arc_easy", "anchor_name": "arc_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.000321700208587572, "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", "accuracy": 0.21, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 158.264, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.028571428571428636 }, { "cell_id": "A::mbpp_plus::r4:medmcqa_easy", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8672773241996765, "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 171.456, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.01428571428571426 }, { "cell_id": "A::mbpp_plus::r4:mmlu_elementary_math", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:mmlu_elementary_math", "anchor_name": "mmlu_elementary_math", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9442014694213867, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", "accuracy": 0.20666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 149.307, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.04285714285714289 }, { "cell_id": "A::mbpp_plus::r4:mmlu_high_school_biology", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:mmlu_high_school_biology", "anchor_name": "mmlu_high_school_biology", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9451485872268677, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 161.958, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.01428571428571426 }, { "cell_id": "A::mbpp_plus::r4:mmlu_high_school_physics", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:mmlu_high_school_physics", "anchor_name": "mmlu_high_school_physics", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9600575566291809, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", "accuracy": 0.21, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 160.754, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.028571428571428636 }, { "cell_id": "A::mbpp_plus::r4:openbookqa", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:openbookqa", "anchor_name": "openbookqa", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8676939606666565, "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", "accuracy": 0.2, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 160.035, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.07142857142857141 }, { "cell_id": "A::mbpp_plus::r4:sciq", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r4:sciq", "anchor_name": "sciq", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.0003326318983454257, "adapter_dir": "/workspace/round3_out/round4/Y/sciq", "accuracy": 0.21, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 154.652, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.028571428571428636 }, { "cell_id": "A::mbpp_plus::r5:medmcqa_easy", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8672773241996765, "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 166.789, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.01428571428571426 }, { "cell_id": "A::mbpp_plus::r5:pubmedqa_pqal", "stage": "locality_single_anchor", "task": "mbpp_plus", "target_domain": "code", "anchor_ref": "r5:pubmedqa_pqal", "anchor_name": "pubmedqa_pqal", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8933367133140564, "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", "accuracy": 0.20333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 157.746, "base_Y": 0.21666666666666667, "oracle": 0.45, "single_anchor_gap": -0.057142857142857155 }, { "cell_id": "A::mbpp_test_held::r4:humaneval", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:humaneval", "anchor_name": "humaneval", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9843209981918335, "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 52.845, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.0 }, { "cell_id": "A::mbpp_test_held::r4:mbpp", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:mbpp", "anchor_name": "mbpp", "anchor_round": "r4", "anchor_domain": "code", "cos_X": -0.00017454303451813757, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", "accuracy": 0.3, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 49.504, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.7777777777777776 }, { "cell_id": "A::mbpp_test_held::r4:mbpp_sanitized", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 1.0012516975402832, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", "accuracy": 0.29, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 51.035, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.6666666666666664 }, { "cell_id": "A::mbpp_test_held::r5:conala_curated", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:conala_curated", "anchor_name": "conala_curated", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.891869068145752, "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 74.517, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.0 }, { "cell_id": "A::mbpp_test_held::r5:humaneval", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:humaneval", "anchor_name": "humaneval", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.9843209981918335, "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 55.158, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.0 }, { "cell_id": "A::mbpp_test_held::r5:mbpp_sanitized", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r5", "anchor_domain": "code", "cos_X": -0.00017454303451813757, "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", "accuracy": 0.3, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 50.742, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.7777777777777776 }, { "cell_id": "A::mbpp_test_held::r4:aqua_rat", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:aqua_rat", "anchor_name": "aqua_rat", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9019709825515747, "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 52.984, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::mbpp_test_held::r4:gsm8k", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:gsm8k", "anchor_name": "gsm8k", "anchor_round": "r4", "anchor_domain": "math", "cos_X": -0.00032459679641760886, "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 46.407, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r4:math_algebra_easy", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:math_algebra_easy", "anchor_name": "math_algebra_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9648966789245605, "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 48.28, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r4:math_counting_easy", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9844874739646912, "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 51.705, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.0 }, { "cell_id": "A::mbpp_test_held::r4:multiarith", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:multiarith", "anchor_name": "multiarith", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.969685435295105, "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 50.83, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.0 }, { "cell_id": "A::mbpp_test_held::r4:svamp", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:svamp", "anchor_name": "svamp", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9480026960372925, "adapter_dir": "/workspace/round3_out/round4/Y/svamp", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 5, "eval_seconds": 49.549, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r5:aqua_rat_numeric", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:aqua_rat_numeric", "anchor_name": "aqua_rat_numeric", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.00034561159554868937, "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 0, "eval_seconds": 58.851, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r5:math_counting_easy", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.00020121937268413603, "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 46.892, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::mbpp_test_held::r5:mawps", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:mawps", "anchor_name": "mawps", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0002617448626551777, "adapter_dir": "/workspace/round3_out/round5/Y/mawps", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 47.997, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::mbpp_test_held::r4:arc_easy", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:arc_easy", "anchor_name": "arc_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.00020039879018440843, "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 3, "eval_seconds": 50.507, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r4:medmcqa_easy", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8870396614074707, "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 52.987, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r4:mmlu_elementary_math", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:mmlu_elementary_math", "anchor_name": "mmlu_elementary_math", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9657072424888611, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", "accuracy": 0.26, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 43.06, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.3333333333333333 }, { "cell_id": "A::mbpp_test_held::r4:mmlu_high_school_biology", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:mmlu_high_school_biology", "anchor_name": "mmlu_high_school_biology", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9674594402313232, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 48.926, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r4:mmlu_high_school_physics", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:mmlu_high_school_physics", "anchor_name": "mmlu_high_school_physics", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.983260452747345, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 53.664, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::mbpp_test_held::r4:openbookqa", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:openbookqa", "anchor_name": "openbookqa", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.88755863904953, "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 50.62, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::mbpp_test_held::r4:sciq", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r4:sciq", "anchor_name": "sciq", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.00023939934908412397, "adapter_dir": "/workspace/round3_out/round4/Y/sciq", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 54.751, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::mbpp_test_held::r5:medmcqa_easy", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8870396614074707, "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 51.649, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.22222222222222213 }, { "cell_id": "A::mbpp_test_held::r5:pubmedqa_pqal", "stage": "locality_single_anchor", "task": "mbpp_test_held", "target_domain": "code", "anchor_ref": "r5:pubmedqa_pqal", "anchor_name": "pubmedqa_pqal", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.9136675000190735, "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 50.918, "base_Y": 0.23, "oracle": 0.32, "single_anchor_gap": 0.11111111111111091 }, { "cell_id": "A::openbookqa_test::r4:humaneval", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:humaneval", "anchor_name": "humaneval", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9508311748504639, "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", "accuracy": 0.7166666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 4.404, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.02439024390243918 }, { "cell_id": "A::openbookqa_test::r4:mbpp", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:mbpp", "anchor_name": "mbpp", "anchor_round": "r4", "anchor_domain": "code", "cos_X": -0.00021616967569570988, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", "accuracy": 0.6833333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 4.538, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.09756097560975592 }, { "cell_id": "A::openbookqa_test::r4:mbpp_sanitized", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r4", "anchor_domain": "code", "cos_X": 0.9530814290046692, "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", "accuracy": 0.6933333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 4.846, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.060975609756097345 }, { "cell_id": "A::openbookqa_test::r5:conala_curated", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:conala_curated", "anchor_name": "conala_curated", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.8603157997131348, "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", "accuracy": 0.7233333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 28.747, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.04878048780487836 }, { "cell_id": "A::openbookqa_test::r5:humaneval", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:humaneval", "anchor_name": "humaneval", "anchor_round": "r5", "anchor_domain": "code", "cos_X": 0.9508311748504639, "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", "accuracy": 0.7166666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 4.672, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.02439024390243918 }, { "cell_id": "A::openbookqa_test::r5:mbpp_sanitized", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:mbpp_sanitized", "anchor_name": "mbpp_sanitized", "anchor_round": "r5", "anchor_domain": "code", "cos_X": -0.00021616967569570988, "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", "accuracy": 0.6833333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 4.696, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.09756097560975592 }, { "cell_id": "A::openbookqa_test::r4:aqua_rat", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:aqua_rat", "anchor_name": "aqua_rat", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.8845954537391663, "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", "accuracy": 0.7, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 27.648, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.03658536585365857 }, { "cell_id": "A::openbookqa_test::r4:gsm8k", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:gsm8k", "anchor_name": "gsm8k", "anchor_round": "r4", "anchor_domain": "math", "cos_X": -0.0003817097167484462, "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 5.373, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.08536585365853654 }, { "cell_id": "A::openbookqa_test::r4:math_algebra_easy", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:math_algebra_easy", "anchor_name": "math_algebra_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9346193075180054, "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", "accuracy": 0.7233333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 8.361, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.04878048780487836 }, { "cell_id": "A::openbookqa_test::r4:math_counting_easy", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9526723623275757, "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", "accuracy": 0.7233333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 3.554, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.04878048780487836 }, { "cell_id": "A::openbookqa_test::r4:multiarith", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:multiarith", "anchor_name": "multiarith", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9391674399375916, "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", "accuracy": 0.7266666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 4.66, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.060975609756097754 }, { "cell_id": "A::openbookqa_test::r4:svamp", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:svamp", "anchor_name": "svamp", "anchor_round": "r4", "anchor_domain": "math", "cos_X": 0.9191423058509827, "adapter_dir": "/workspace/round3_out/round4/Y/svamp", "accuracy": 0.69, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 3.473, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.07317073170731714 }, { "cell_id": "A::openbookqa_test::r5:aqua_rat_numeric", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:aqua_rat_numeric", "anchor_name": "aqua_rat_numeric", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.0004580095992423594, "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", "accuracy": 0.7566666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 26.635, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.17073170731707346 }, { "cell_id": "A::openbookqa_test::r5:math_counting_easy", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:math_counting_easy", "anchor_name": "math_counting_easy", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.00032379472395405173, "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", "accuracy": 0.7166666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 3.361, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.02439024390243918 }, { "cell_id": "A::openbookqa_test::r5:mawps", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:mawps", "anchor_name": "mawps", "anchor_round": "r5", "anchor_domain": "math", "cos_X": -0.00039862250559963286, "adapter_dir": "/workspace/round3_out/round5/Y/mawps", "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 6.23, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.07317073170731714 }, { "cell_id": "A::openbookqa_test::r4:arc_easy", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:arc_easy", "anchor_name": "arc_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.0005088147590868175, "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", "accuracy": 0.7166666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 16.606, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.02439024390243918 }, { "cell_id": "A::openbookqa_test::r4:medmcqa_easy", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.8757078647613525, "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", "accuracy": 0.7133333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.411, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.012195121951219795 }, { "cell_id": "A::openbookqa_test::r4:mmlu_elementary_math", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:mmlu_elementary_math", "anchor_name": "mmlu_elementary_math", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9520111680030823, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", "accuracy": 0.7133333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 29.065, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.012195121951219795 }, { "cell_id": "A::openbookqa_test::r4:mmlu_high_school_biology", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:mmlu_high_school_biology", "anchor_name": "mmlu_high_school_biology", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9565833806991577, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", "accuracy": 0.6733333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 28.46, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.1341463414634145 }, { "cell_id": "A::openbookqa_test::r4:mmlu_high_school_physics", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:mmlu_high_school_physics", "anchor_name": "mmlu_high_school_physics", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.9671817421913147, "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", "accuracy": 0.6966666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 28.106, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.04878048780487796 }, { "cell_id": "A::openbookqa_test::r4:openbookqa", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:openbookqa", "anchor_name": "openbookqa", "anchor_round": "r4", "anchor_domain": "science", "cos_X": 0.89091557264328, "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", "accuracy": 0.81, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 19.183, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.3658536585365857 }, { "cell_id": "A::openbookqa_test::r4:sciq", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r4:sciq", "anchor_name": "sciq", "anchor_round": "r4", "anchor_domain": "science", "cos_X": -0.00015819823602214456, "adapter_dir": "/workspace/round3_out/round4/Y/sciq", "accuracy": 0.7033333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 28.125, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.024390243902438775 }, { "cell_id": "A::openbookqa_test::r5:medmcqa_easy", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:medmcqa_easy", "anchor_name": "medmcqa_easy", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8757078647613525, "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", "accuracy": 0.7133333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 27.62, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": 0.012195121951219795 }, { "cell_id": "A::openbookqa_test::r5:pubmedqa_pqal", "stage": "locality_single_anchor", "task": "openbookqa_test", "target_domain": "science", "anchor_ref": "r5:pubmedqa_pqal", "anchor_name": "pubmedqa_pqal", "anchor_round": "r5", "anchor_domain": "science", "cos_X": 0.8888986110687256, "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", "accuracy": 0.6966666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 22.009, "base_Y": 0.71, "oracle": 0.9833333333333333, "single_anchor_gap": -0.04878048780487796 } ] }