| { |
| "config": { |
| "hub_repo": "CK0607/cross-model-lora-prediction-3b", |
| "model_Y": "meta-llama/Llama-3.2-3B-Instruct", |
| "no_surrogate": true, |
| "generation": { |
| "do_sample": false, |
| "num_beams": 1, |
| "max_new_tokens_code": 96, |
| "max_new_tokens_other": 24 |
| }, |
| "heldouts": [ |
| "gsm_hard", |
| "gsm8k_test_500", |
| "mbpp_test_held", |
| "mbpp_plus", |
| "openbookqa_test" |
| ], |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ] |
| }, |
| "adapter_verification": { |
| "listing": { |
| "round4/X": [ |
| "aqua_rat", |
| "arc_challenge", |
| "arc_easy", |
| "gsm8k", |
| "gsm8k_test_500", |
| "gsm_hard", |
| "humaneval", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "mbpp_plus", |
| "mbpp_sanitized", |
| "mbpp_test_held", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics", |
| "multiarith", |
| "openbookqa", |
| "openbookqa_test", |
| "sciq", |
| "svamp" |
| ], |
| "round4/Y": [ |
| "aqua_rat", |
| "arc_challenge", |
| "arc_easy", |
| "gsm8k", |
| "gsm8k_test_500", |
| "gsm_hard", |
| "humaneval", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "mbpp_plus", |
| "mbpp_sanitized", |
| "mbpp_test_held", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics", |
| "multiarith", |
| "openbookqa", |
| "openbookqa_test", |
| "sciq", |
| "svamp" |
| ], |
| "round5/X": [ |
| "aqua_rat_numeric", |
| "conala_curated", |
| "humaneval", |
| "math_counting_easy", |
| "mawps", |
| "mbpp_sanitized", |
| "medmcqa_easy", |
| "pubmedqa_pqal" |
| ], |
| "round5/Y": [ |
| "aqua_rat_numeric", |
| "conala_curated", |
| "humaneval", |
| "math_counting_easy", |
| "mawps", |
| "mbpp_sanitized", |
| "medmcqa_easy", |
| "pubmedqa_pqal" |
| ], |
| "round6/Y_pred": [ |
| "gsm8k_test_500_global_ridge_N12_seed0", |
| "gsm8k_test_500_global_ridge_N12_seed1", |
| "gsm8k_test_500_global_ridge_N12_seed2", |
| "gsm8k_test_500_global_ridge_N16_seed0", |
| "gsm8k_test_500_global_ridge_N16_seed1", |
| "gsm8k_test_500_global_ridge_N16_seed2", |
| "gsm8k_test_500_global_ridge_N24_full", |
| "gsm8k_test_500_global_ridge_N4_seed0", |
| "gsm8k_test_500_global_ridge_N4_seed1", |
| "gsm8k_test_500_global_ridge_N4_seed2", |
| "gsm8k_test_500_global_ridge_N8_seed0", |
| "gsm8k_test_500_global_ridge_N8_seed1", |
| "gsm8k_test_500_global_ridge_N8_seed2", |
| "gsm8k_test_500_mean_N12_seed0", |
| "gsm8k_test_500_mean_N12_seed1", |
| "gsm8k_test_500_mean_N12_seed2", |
| "gsm8k_test_500_mean_N16_seed0", |
| "gsm8k_test_500_mean_N16_seed1", |
| "gsm8k_test_500_mean_N16_seed2", |
| "gsm8k_test_500_mean_N24_full", |
| "gsm8k_test_500_mean_N4_seed0", |
| "gsm8k_test_500_mean_N4_seed1", |
| "gsm8k_test_500_mean_N4_seed2", |
| "gsm8k_test_500_mean_N8_seed0", |
| "gsm8k_test_500_mean_N8_seed1", |
| "gsm8k_test_500_mean_N8_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N12_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N12_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N12_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N16_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N16_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N16_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N24_full", |
| "gsm8k_test_500_topk8_global_ridge_N4_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N4_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N4_seed2", |
| "gsm8k_test_500_topk8_global_ridge_N8_seed0", |
| "gsm8k_test_500_topk8_global_ridge_N8_seed1", |
| "gsm8k_test_500_topk8_global_ridge_N8_seed2", |
| "gsm_hard_global_ridge_N12_seed0", |
| "gsm_hard_global_ridge_N12_seed1", |
| "gsm_hard_global_ridge_N12_seed2", |
| "gsm_hard_global_ridge_N16_seed0", |
| "gsm_hard_global_ridge_N16_seed1", |
| "gsm_hard_global_ridge_N16_seed2", |
| "gsm_hard_global_ridge_N24_full", |
| "gsm_hard_global_ridge_N4_seed0", |
| "gsm_hard_global_ridge_N4_seed1", |
| "gsm_hard_global_ridge_N4_seed2", |
| "gsm_hard_global_ridge_N8_seed0", |
| "gsm_hard_global_ridge_N8_seed1", |
| "gsm_hard_global_ridge_N8_seed2", |
| "gsm_hard_mean_N12_seed0", |
| "gsm_hard_mean_N12_seed1", |
| "gsm_hard_mean_N12_seed2", |
| "gsm_hard_mean_N16_seed0", |
| "gsm_hard_mean_N16_seed1", |
| "gsm_hard_mean_N16_seed2", |
| "gsm_hard_mean_N24_full", |
| "gsm_hard_mean_N4_seed0", |
| "gsm_hard_mean_N4_seed1", |
| "gsm_hard_mean_N4_seed2", |
| "gsm_hard_mean_N8_seed0", |
| "gsm_hard_mean_N8_seed1", |
| "gsm_hard_mean_N8_seed2", |
| "gsm_hard_topk8_global_ridge_N12_seed0", |
| "gsm_hard_topk8_global_ridge_N12_seed1", |
| "gsm_hard_topk8_global_ridge_N12_seed2", |
| "gsm_hard_topk8_global_ridge_N16_seed0", |
| "gsm_hard_topk8_global_ridge_N16_seed1", |
| "gsm_hard_topk8_global_ridge_N16_seed2", |
| "gsm_hard_topk8_global_ridge_N24_full", |
| "gsm_hard_topk8_global_ridge_N4_seed0", |
| "gsm_hard_topk8_global_ridge_N4_seed1", |
| "gsm_hard_topk8_global_ridge_N4_seed2", |
| "gsm_hard_topk8_global_ridge_N8_seed0", |
| "gsm_hard_topk8_global_ridge_N8_seed1", |
| "gsm_hard_topk8_global_ridge_N8_seed2", |
| "mbpp_plus_global_ridge_N12_seed0", |
| "mbpp_plus_global_ridge_N12_seed1", |
| "mbpp_plus_global_ridge_N12_seed2", |
| "mbpp_plus_global_ridge_N16_seed0", |
| "mbpp_plus_global_ridge_N16_seed1", |
| "mbpp_plus_global_ridge_N16_seed2", |
| "mbpp_plus_global_ridge_N24_full", |
| "mbpp_plus_global_ridge_N4_seed0", |
| "mbpp_plus_global_ridge_N4_seed1", |
| "mbpp_plus_global_ridge_N4_seed2", |
| "mbpp_plus_global_ridge_N8_seed0", |
| "mbpp_plus_global_ridge_N8_seed1", |
| "mbpp_plus_global_ridge_N8_seed2", |
| "mbpp_plus_mean_N12_seed0", |
| "mbpp_plus_mean_N12_seed1", |
| "mbpp_plus_mean_N12_seed2", |
| "mbpp_plus_mean_N16_seed0", |
| "mbpp_plus_mean_N16_seed1", |
| "mbpp_plus_mean_N16_seed2", |
| "mbpp_plus_mean_N24_full", |
| "mbpp_plus_mean_N4_seed0", |
| "mbpp_plus_mean_N4_seed1", |
| "mbpp_plus_mean_N4_seed2", |
| "mbpp_plus_mean_N8_seed0", |
| "mbpp_plus_mean_N8_seed1", |
| "mbpp_plus_mean_N8_seed2", |
| "mbpp_plus_topk8_global_ridge_N12_seed0", |
| "mbpp_plus_topk8_global_ridge_N12_seed1", |
| "mbpp_plus_topk8_global_ridge_N12_seed2", |
| "mbpp_plus_topk8_global_ridge_N16_seed0", |
| "mbpp_plus_topk8_global_ridge_N16_seed1", |
| "mbpp_plus_topk8_global_ridge_N16_seed2", |
| "mbpp_plus_topk8_global_ridge_N24_full", |
| "mbpp_plus_topk8_global_ridge_N4_seed0", |
| "mbpp_plus_topk8_global_ridge_N4_seed1", |
| "mbpp_plus_topk8_global_ridge_N4_seed2", |
| "mbpp_plus_topk8_global_ridge_N8_seed0", |
| "mbpp_plus_topk8_global_ridge_N8_seed1", |
| "mbpp_plus_topk8_global_ridge_N8_seed2", |
| "mbpp_test_held_global_ridge_N12_seed0", |
| "mbpp_test_held_global_ridge_N12_seed1", |
| "mbpp_test_held_global_ridge_N12_seed2", |
| "mbpp_test_held_global_ridge_N16_seed0", |
| "mbpp_test_held_global_ridge_N16_seed1", |
| "mbpp_test_held_global_ridge_N16_seed2", |
| "mbpp_test_held_global_ridge_N24_full", |
| "mbpp_test_held_global_ridge_N4_seed0", |
| "mbpp_test_held_global_ridge_N4_seed1", |
| "mbpp_test_held_global_ridge_N4_seed2", |
| "mbpp_test_held_global_ridge_N8_seed0", |
| "mbpp_test_held_global_ridge_N8_seed1", |
| "mbpp_test_held_global_ridge_N8_seed2", |
| "mbpp_test_held_mean_N12_seed0", |
| "mbpp_test_held_mean_N12_seed1", |
| "mbpp_test_held_mean_N12_seed2", |
| "mbpp_test_held_mean_N16_seed0", |
| "mbpp_test_held_mean_N16_seed1", |
| "mbpp_test_held_mean_N16_seed2", |
| "mbpp_test_held_mean_N24_full", |
| "mbpp_test_held_mean_N4_seed0", |
| "mbpp_test_held_mean_N4_seed1", |
| "mbpp_test_held_mean_N4_seed2", |
| "mbpp_test_held_mean_N8_seed0", |
| "mbpp_test_held_mean_N8_seed1", |
| "mbpp_test_held_mean_N8_seed2", |
| "mbpp_test_held_topk8_global_ridge_N12_seed0", |
| "mbpp_test_held_topk8_global_ridge_N12_seed1", |
| "mbpp_test_held_topk8_global_ridge_N12_seed2", |
| "mbpp_test_held_topk8_global_ridge_N16_seed0", |
| "mbpp_test_held_topk8_global_ridge_N16_seed1", |
| "mbpp_test_held_topk8_global_ridge_N16_seed2", |
| "mbpp_test_held_topk8_global_ridge_N24_full", |
| "mbpp_test_held_topk8_global_ridge_N4_seed0", |
| "mbpp_test_held_topk8_global_ridge_N4_seed1", |
| "mbpp_test_held_topk8_global_ridge_N4_seed2", |
| "mbpp_test_held_topk8_global_ridge_N8_seed0", |
| "mbpp_test_held_topk8_global_ridge_N8_seed1", |
| "mbpp_test_held_topk8_global_ridge_N8_seed2", |
| "openbookqa_test_global_ridge_N12_seed0", |
| "openbookqa_test_global_ridge_N12_seed1", |
| "openbookqa_test_global_ridge_N12_seed2", |
| "openbookqa_test_global_ridge_N16_seed0", |
| "openbookqa_test_global_ridge_N16_seed1", |
| "openbookqa_test_global_ridge_N16_seed2", |
| "openbookqa_test_global_ridge_N24_full", |
| "openbookqa_test_global_ridge_N4_seed0", |
| "openbookqa_test_global_ridge_N4_seed1", |
| "openbookqa_test_global_ridge_N4_seed2", |
| "openbookqa_test_global_ridge_N8_seed0", |
| "openbookqa_test_global_ridge_N8_seed1", |
| "openbookqa_test_global_ridge_N8_seed2", |
| "openbookqa_test_mean_N12_seed0", |
| "openbookqa_test_mean_N12_seed1", |
| "openbookqa_test_mean_N12_seed2", |
| "openbookqa_test_mean_N16_seed0", |
| "openbookqa_test_mean_N16_seed1", |
| "openbookqa_test_mean_N16_seed2", |
| "openbookqa_test_mean_N24_full", |
| "openbookqa_test_mean_N4_seed0", |
| "openbookqa_test_mean_N4_seed1", |
| "openbookqa_test_mean_N4_seed2", |
| "openbookqa_test_mean_N8_seed0", |
| "openbookqa_test_mean_N8_seed1", |
| "openbookqa_test_mean_N8_seed2", |
| "openbookqa_test_topk8_global_ridge_N12_seed0", |
| "openbookqa_test_topk8_global_ridge_N12_seed1", |
| "openbookqa_test_topk8_global_ridge_N12_seed2", |
| "openbookqa_test_topk8_global_ridge_N16_seed0", |
| "openbookqa_test_topk8_global_ridge_N16_seed1", |
| "openbookqa_test_topk8_global_ridge_N16_seed2", |
| "openbookqa_test_topk8_global_ridge_N24_full", |
| "openbookqa_test_topk8_global_ridge_N4_seed0", |
| "openbookqa_test_topk8_global_ridge_N4_seed1", |
| "openbookqa_test_topk8_global_ridge_N4_seed2", |
| "openbookqa_test_topk8_global_ridge_N8_seed0", |
| "openbookqa_test_topk8_global_ridge_N8_seed1", |
| "openbookqa_test_topk8_global_ridge_N8_seed2" |
| ], |
| "round8/Y_pred": [ |
| "gsm8k_test_500_pertensor_pca_N12_seed0", |
| "gsm8k_test_500_pertensor_pca_N12_seed1", |
| "gsm8k_test_500_pertensor_pca_N12_seed2", |
| "gsm8k_test_500_pertensor_pca_N16_seed0", |
| "gsm8k_test_500_pertensor_pca_N16_seed1", |
| "gsm8k_test_500_pertensor_pca_N16_seed2", |
| "gsm8k_test_500_pertensor_pca_N24_full", |
| "gsm8k_test_500_pertensor_ridge_N12_seed0", |
| "gsm8k_test_500_pertensor_ridge_N12_seed1", |
| "gsm8k_test_500_pertensor_ridge_N12_seed2", |
| "gsm8k_test_500_pertensor_ridge_N16_seed0", |
| "gsm8k_test_500_pertensor_ridge_N16_seed1", |
| "gsm8k_test_500_pertensor_ridge_N16_seed2", |
| "gsm8k_test_500_pertensor_ridge_N24_full", |
| "gsm8k_test_500_procrustes_N12_seed0", |
| "gsm8k_test_500_procrustes_N12_seed1", |
| "gsm8k_test_500_procrustes_N12_seed2", |
| "gsm8k_test_500_procrustes_N16_seed0", |
| "gsm8k_test_500_procrustes_N16_seed1", |
| "gsm8k_test_500_procrustes_N16_seed2", |
| "gsm8k_test_500_procrustes_N24_full", |
| "gsm8k_test_500_topk12_global_ridge_N24_full", |
| "gsm8k_test_500_topk16_global_ridge_N24_full", |
| "gsm8k_test_500_topk20_global_ridge_N24_full", |
| "gsm8k_test_500_topk24_global_ridge_N24_full", |
| "gsm8k_test_500_topk2_global_ridge_N24_full", |
| "gsm8k_test_500_topk4_global_ridge_N24_full", |
| "gsm8k_test_500_topk6_global_ridge_N24_full", |
| "gsm8k_test_500_topk8_global_ridge_N24_full", |
| "gsm_hard_pertensor_pca_N12_seed0", |
| "gsm_hard_pertensor_pca_N12_seed1", |
| "gsm_hard_pertensor_pca_N12_seed2", |
| "gsm_hard_pertensor_pca_N16_seed0", |
| "gsm_hard_pertensor_pca_N16_seed1", |
| "gsm_hard_pertensor_pca_N16_seed2", |
| "gsm_hard_pertensor_pca_N24_full", |
| "gsm_hard_pertensor_ridge_N12_seed0", |
| "gsm_hard_pertensor_ridge_N12_seed1", |
| "gsm_hard_pertensor_ridge_N12_seed2", |
| "gsm_hard_pertensor_ridge_N16_seed0", |
| "gsm_hard_pertensor_ridge_N16_seed1", |
| "gsm_hard_pertensor_ridge_N16_seed2", |
| "gsm_hard_pertensor_ridge_N24_full", |
| "gsm_hard_procrustes_N12_seed0", |
| "gsm_hard_procrustes_N12_seed1", |
| "gsm_hard_procrustes_N12_seed2", |
| "gsm_hard_procrustes_N16_seed0", |
| "gsm_hard_procrustes_N16_seed1", |
| "gsm_hard_procrustes_N16_seed2", |
| "gsm_hard_procrustes_N24_full", |
| "gsm_hard_topk12_global_ridge_N24_full", |
| "gsm_hard_topk16_global_ridge_N24_full", |
| "gsm_hard_topk20_global_ridge_N24_full", |
| "gsm_hard_topk24_global_ridge_N24_full", |
| "gsm_hard_topk2_global_ridge_N24_full", |
| "gsm_hard_topk4_global_ridge_N24_full", |
| "gsm_hard_topk6_global_ridge_N24_full", |
| "gsm_hard_topk8_global_ridge_N24_full", |
| "mbpp_plus_pertensor_pca_N12_seed0", |
| "mbpp_plus_pertensor_pca_N12_seed1", |
| "mbpp_plus_pertensor_pca_N12_seed2", |
| "mbpp_plus_pertensor_pca_N16_seed0", |
| "mbpp_plus_pertensor_pca_N16_seed1", |
| "mbpp_plus_pertensor_pca_N16_seed2", |
| "mbpp_plus_pertensor_pca_N24_full", |
| "mbpp_plus_pertensor_ridge_N12_seed0", |
| "mbpp_plus_pertensor_ridge_N12_seed1", |
| "mbpp_plus_pertensor_ridge_N12_seed2", |
| "mbpp_plus_pertensor_ridge_N16_seed0", |
| "mbpp_plus_pertensor_ridge_N16_seed1", |
| "mbpp_plus_pertensor_ridge_N16_seed2", |
| "mbpp_plus_pertensor_ridge_N24_full", |
| "mbpp_plus_procrustes_N12_seed0", |
| "mbpp_plus_procrustes_N12_seed1", |
| "mbpp_plus_procrustes_N12_seed2", |
| "mbpp_plus_procrustes_N16_seed0", |
| "mbpp_plus_procrustes_N16_seed1", |
| "mbpp_plus_procrustes_N16_seed2", |
| "mbpp_plus_procrustes_N24_full", |
| "mbpp_plus_topk12_global_ridge_N24_full", |
| "mbpp_plus_topk16_global_ridge_N24_full", |
| "mbpp_plus_topk20_global_ridge_N24_full", |
| "mbpp_plus_topk24_global_ridge_N24_full", |
| "mbpp_plus_topk2_global_ridge_N24_full", |
| "mbpp_plus_topk4_global_ridge_N24_full", |
| "mbpp_plus_topk6_global_ridge_N24_full", |
| "mbpp_plus_topk8_global_ridge_N24_full", |
| "mbpp_test_held_pertensor_pca_N12_seed0", |
| "mbpp_test_held_pertensor_pca_N12_seed1", |
| "mbpp_test_held_pertensor_pca_N12_seed2", |
| "mbpp_test_held_pertensor_pca_N16_seed0", |
| "mbpp_test_held_pertensor_pca_N16_seed1", |
| "mbpp_test_held_pertensor_pca_N16_seed2", |
| "mbpp_test_held_pertensor_pca_N24_full", |
| "mbpp_test_held_pertensor_ridge_N12_seed0", |
| "mbpp_test_held_pertensor_ridge_N12_seed1", |
| "mbpp_test_held_pertensor_ridge_N12_seed2", |
| "mbpp_test_held_pertensor_ridge_N16_seed0", |
| "mbpp_test_held_pertensor_ridge_N16_seed1", |
| "mbpp_test_held_pertensor_ridge_N16_seed2", |
| "mbpp_test_held_pertensor_ridge_N24_full", |
| "mbpp_test_held_procrustes_N12_seed0", |
| "mbpp_test_held_procrustes_N12_seed1", |
| "mbpp_test_held_procrustes_N12_seed2", |
| "mbpp_test_held_procrustes_N16_seed0", |
| "mbpp_test_held_procrustes_N16_seed1", |
| "mbpp_test_held_procrustes_N16_seed2", |
| "mbpp_test_held_procrustes_N24_full", |
| "mbpp_test_held_topk12_global_ridge_N24_full", |
| "mbpp_test_held_topk16_global_ridge_N24_full", |
| "mbpp_test_held_topk20_global_ridge_N24_full", |
| "mbpp_test_held_topk24_global_ridge_N24_full", |
| "mbpp_test_held_topk2_global_ridge_N24_full", |
| "mbpp_test_held_topk4_global_ridge_N24_full", |
| "mbpp_test_held_topk6_global_ridge_N24_full", |
| "mbpp_test_held_topk8_global_ridge_N24_full", |
| "openbookqa_test_pertensor_pca_N12_seed0", |
| "openbookqa_test_pertensor_pca_N12_seed1", |
| "openbookqa_test_pertensor_pca_N12_seed2", |
| "openbookqa_test_pertensor_pca_N16_seed0", |
| "openbookqa_test_pertensor_pca_N16_seed1", |
| "openbookqa_test_pertensor_pca_N16_seed2", |
| "openbookqa_test_pertensor_pca_N24_full", |
| "openbookqa_test_pertensor_ridge_N12_seed0", |
| "openbookqa_test_pertensor_ridge_N12_seed1", |
| "openbookqa_test_pertensor_ridge_N12_seed2", |
| "openbookqa_test_pertensor_ridge_N16_seed0", |
| "openbookqa_test_pertensor_ridge_N16_seed1", |
| "openbookqa_test_pertensor_ridge_N16_seed2", |
| "openbookqa_test_pertensor_ridge_N24_full", |
| "openbookqa_test_procrustes_N12_seed0", |
| "openbookqa_test_procrustes_N12_seed1", |
| "openbookqa_test_procrustes_N12_seed2", |
| "openbookqa_test_procrustes_N16_seed0", |
| "openbookqa_test_procrustes_N16_seed1", |
| "openbookqa_test_procrustes_N16_seed2", |
| "openbookqa_test_procrustes_N24_full", |
| "openbookqa_test_topk12_global_ridge_N24_full", |
| "openbookqa_test_topk16_global_ridge_N24_full", |
| "openbookqa_test_topk20_global_ridge_N24_full", |
| "openbookqa_test_topk24_global_ridge_N24_full", |
| "openbookqa_test_topk2_global_ridge_N24_full", |
| "openbookqa_test_topk4_global_ridge_N24_full", |
| "openbookqa_test_topk6_global_ridge_N24_full", |
| "openbookqa_test_topk8_global_ridge_N24_full" |
| ] |
| }, |
| "missing": [], |
| "count_warnings": [] |
| }, |
| "baselines": { |
| "gsm_hard": { |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15 |
| }, |
| "gsm8k_test_500": { |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333 |
| }, |
| "mbpp_test_held": { |
| "base_Y": 0.23, |
| "oracle": 0.32 |
| }, |
| "mbpp_plus": { |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45 |
| }, |
| "openbookqa_test": { |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333 |
| } |
| }, |
| "summary": { |
| "overall_spearman": 0.1299305286501594, |
| "per_heldout": { |
| "gsm_hard": 0.28540339681524574, |
| "gsm8k_test_500": 0.04367789323240817, |
| "mbpp_test_held": -0.2896337305973781, |
| "mbpp_plus": -0.10085771557215327, |
| "openbookqa_test": -0.2975768832863932 |
| }, |
| "per_target_domain": { |
| "code": 0.017636096528107978, |
| "math": 0.2826406573020395, |
| "science": -0.2975768832863932 |
| }, |
| "per_anchor_domain": { |
| "code": 0.14899075217681867, |
| "math": 0.07629665365697172, |
| "science": 0.2376382913201063 |
| }, |
| "decision": "weak locality, ridge subsumes it" |
| }, |
| "records": [ |
| { |
| "cell_id": "A::gsm8k_test_500::r4:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9497017860412598, |
| "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", |
| "accuracy": 0.17333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 28.044, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.43750000000000006 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:mbpp", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:mbpp", |
| "anchor_name": "mbpp", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": -0.00027140171732753515, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", |
| "accuracy": 0.06, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 24.609, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.09375000000000003 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9527238011360168, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 23.479, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.06250000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:conala_curated", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:conala_curated", |
| "anchor_name": "conala_curated", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.8599404692649841, |
| "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", |
| "accuracy": 0.18333333333333332, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 28.998, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.48437499999999994 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.9497017860412598, |
| "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", |
| "accuracy": 0.17333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 29.242, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.43750000000000006 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": -0.00027140171732753515, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", |
| "accuracy": 0.06, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 25.368, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.09375000000000003 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:aqua_rat", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:aqua_rat", |
| "anchor_name": "aqua_rat", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8731036186218262, |
| "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", |
| "accuracy": 0.056666666666666664, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 28.153, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.10937500000000003 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:gsm8k", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:gsm8k", |
| "anchor_name": "gsm8k", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": -0.0006812263745814562, |
| "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", |
| "accuracy": 0.14, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 6.608, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.28125000000000006 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:math_algebra_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:math_algebra_easy", |
| "anchor_name": "math_algebra_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9428298473358154, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", |
| "accuracy": 0.08333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 6.667, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.015624999999999972 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9606146216392517, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", |
| "accuracy": 0.07333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 10.451, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.03125000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:multiarith", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:multiarith", |
| "anchor_name": "multiarith", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9475974440574646, |
| "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 15.16, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.015625000000000038 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:svamp", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:svamp", |
| "anchor_name": "svamp", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9288908839225769, |
| "adapter_dir": "/workspace/round3_out/round4/Y/svamp", |
| "accuracy": 0.07, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 5.968, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.04687499999999998 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:aqua_rat_numeric", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:aqua_rat_numeric", |
| "anchor_name": "aqua_rat_numeric", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0004875913728028536, |
| "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", |
| "accuracy": 0.08, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 29.239, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0004786302160937339, |
| "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", |
| "accuracy": 0.07333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 8.228, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.03125000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:mawps", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:mawps", |
| "anchor_name": "mawps", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0008274300489574671, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mawps", |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 10.047, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.015625000000000038 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:arc_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:arc_easy", |
| "anchor_name": "arc_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.0004913151497021317, |
| "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", |
| "accuracy": 0.07333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 28.256, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.03125000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8598978519439697, |
| "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", |
| "accuracy": 0.09333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.938, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.06250000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:mmlu_elementary_math", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:mmlu_elementary_math", |
| "anchor_name": "mmlu_elementary_math", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9377825260162354, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", |
| "accuracy": 0.08, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 26.479, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:mmlu_high_school_biology", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:mmlu_high_school_biology", |
| "anchor_name": "mmlu_high_school_biology", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9370604753494263, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.829, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": -0.015625000000000038 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:mmlu_high_school_physics", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:mmlu_high_school_physics", |
| "anchor_name": "mmlu_high_school_physics", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9513278603553772, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", |
| "accuracy": 0.09333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 27.159, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.06250000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:openbookqa", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:openbookqa", |
| "anchor_name": "openbookqa", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8619711995124817, |
| "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 29.32, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.07812499999999999 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r4:sciq", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r4:sciq", |
| "anchor_name": "sciq", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.00034972114372067153, |
| "adapter_dir": "/workspace/round3_out/round4/Y/sciq", |
| "accuracy": 0.1, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 28.327, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.09375000000000003 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8598978519439697, |
| "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", |
| "accuracy": 0.09333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 28.144, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.06250000000000001 |
| }, |
| { |
| "cell_id": "A::gsm8k_test_500::r5:pubmedqa_pqal", |
| "stage": "locality_single_anchor", |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "anchor_ref": "r5:pubmedqa_pqal", |
| "anchor_name": "pubmedqa_pqal", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8853808641433716, |
| "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", |
| "accuracy": 0.08, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 18.209, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.8956640958786011, |
| "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 28.25, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": 0.15384615384615374 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:mbpp", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:mbpp", |
| "anchor_name": "mbpp", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": -0.000505593023262918, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", |
| "accuracy": 0.056666666666666664, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 27.549, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.07692307692307702 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.8983818888664246, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", |
| "accuracy": 0.05333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 27.652, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.11538461538461542 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:conala_curated", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:conala_curated", |
| "anchor_name": "conala_curated", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.8125380277633667, |
| "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", |
| "accuracy": 0.07333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 28.894, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": 0.11538461538461534 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.8956640958786011, |
| "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", |
| "accuracy": 0.07666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 29.186, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": 0.15384615384615374 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": -0.000505593023262918, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", |
| "accuracy": 0.056666666666666664, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 28.051, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.07692307692307702 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:aqua_rat", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:aqua_rat", |
| "anchor_name": "aqua_rat", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8249850273132324, |
| "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", |
| "accuracy": 0.05333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 28.068, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.11538461538461542 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:gsm8k", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:gsm8k", |
| "anchor_name": "gsm8k", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": -0.0006781710544601083, |
| "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 14.301, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": 0.038461538461538394 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:math_algebra_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:math_algebra_easy", |
| "anchor_name": "math_algebra_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8887502551078796, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", |
| "accuracy": 0.05333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 17.285, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.11538461538461542 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9051075577735901, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", |
| "accuracy": 0.03666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 20.983, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.30769230769230776 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:multiarith", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:multiarith", |
| "anchor_name": "multiarith", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8932132124900818, |
| "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", |
| "accuracy": 0.04666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 26.632, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.19230769230769237 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:svamp", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:svamp", |
| "anchor_name": "svamp", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8764325380325317, |
| "adapter_dir": "/workspace/round3_out/round4/Y/svamp", |
| "accuracy": 0.06666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 23.451, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": 0.038461538461538394 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:aqua_rat_numeric", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:aqua_rat_numeric", |
| "anchor_name": "aqua_rat_numeric", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0008573997183702886, |
| "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", |
| "accuracy": 0.03666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 29.81, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.30769230769230776 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0006265510455705225, |
| "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", |
| "accuracy": 0.04, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 19.003, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.26923076923076933 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:mawps", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:mawps", |
| "anchor_name": "mawps", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0011123694712296128, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mawps", |
| "accuracy": 0.02, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 20.956, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.5000000000000001 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:arc_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:arc_easy", |
| "anchor_name": "arc_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.0009216612670570612, |
| "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", |
| "accuracy": 0.05, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 29.551, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.15384615384615388 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8125821948051453, |
| "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", |
| "accuracy": 0.03333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.713, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.34615384615384626 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:mmlu_elementary_math", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:mmlu_elementary_math", |
| "anchor_name": "mmlu_elementary_math", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8851031064987183, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", |
| "accuracy": 0.03, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 29.335, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.38461538461538475 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:mmlu_high_school_biology", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:mmlu_high_school_biology", |
| "anchor_name": "mmlu_high_school_biology", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8839024305343628, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", |
| "accuracy": 0.04, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 29.358, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.26923076923076933 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:mmlu_high_school_physics", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:mmlu_high_school_physics", |
| "anchor_name": "mmlu_high_school_physics", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8970074653625488, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", |
| "accuracy": 0.06, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 28.333, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.038461538461538554 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:openbookqa", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:openbookqa", |
| "anchor_name": "openbookqa", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8150046467781067, |
| "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", |
| "accuracy": 0.056666666666666664, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 29.83, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.07692307692307702 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r4:sciq", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r4:sciq", |
| "anchor_name": "sciq", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.0006476823473349214, |
| "adapter_dir": "/workspace/round3_out/round4/Y/sciq", |
| "accuracy": 0.023333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 28.818, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.4615384615384617 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8125821948051453, |
| "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", |
| "accuracy": 0.03333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 28.137, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.34615384615384626 |
| }, |
| { |
| "cell_id": "A::gsm_hard::r5:pubmedqa_pqal", |
| "stage": "locality_single_anchor", |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "anchor_ref": "r5:pubmedqa_pqal", |
| "anchor_name": "pubmedqa_pqal", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8367233276367188, |
| "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", |
| "accuracy": 0.05, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.028, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "single_anchor_gap": -0.15384615384615388 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9624950885772705, |
| "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", |
| "accuracy": 0.2, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 150.314, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.07142857142857141 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:mbpp", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:mbpp", |
| "anchor_name": "mbpp", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": -0.0003052547399420291, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", |
| "accuracy": 0.2833333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 146.154, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": 0.28571428571428564 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9884072542190552, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", |
| "accuracy": 0.2633333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 149.756, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": 0.19999999999999984 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:conala_curated", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:conala_curated", |
| "anchor_name": "conala_curated", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.8741890788078308, |
| "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", |
| "accuracy": 0.18666666666666668, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 217.294, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.12857142857142856 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.9624950885772705, |
| "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", |
| "accuracy": 0.2, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 157.393, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.07142857142857141 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": -0.0003052547399420291, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", |
| "accuracy": 0.2833333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 149.779, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": 0.28571428571428564 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:aqua_rat", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:aqua_rat", |
| "anchor_name": "aqua_rat", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8814506530761719, |
| "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", |
| "accuracy": 0.21666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 165.548, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:gsm8k", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:gsm8k", |
| "anchor_name": "gsm8k", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": -0.000335412856657058, |
| "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", |
| "accuracy": 0.20666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 161.016, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.04285714285714289 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:math_algebra_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:math_algebra_easy", |
| "anchor_name": "math_algebra_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9431692361831665, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 152.059, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.01428571428571426 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9616791009902954, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", |
| "accuracy": 0.22333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 161.601, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": 0.02857142857142852 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:multiarith", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:multiarith", |
| "anchor_name": "multiarith", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9476068615913391, |
| "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", |
| "accuracy": 0.18333333333333332, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 157.375, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.14285714285714293 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:svamp", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:svamp", |
| "anchor_name": "svamp", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9265090823173523, |
| "adapter_dir": "/workspace/round3_out/round4/Y/svamp", |
| "accuracy": 0.2, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 156.805, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.07142857142857141 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:aqua_rat_numeric", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:aqua_rat_numeric", |
| "anchor_name": "aqua_rat_numeric", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.00045860654790885746, |
| "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 181.409, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.01428571428571426 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0002999037387780845, |
| "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", |
| "accuracy": 0.22, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 153.077, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": 0.01428571428571426 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:mawps", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:mawps", |
| "anchor_name": "mawps", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0004362465988378972, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mawps", |
| "accuracy": 0.19666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 160.786, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.08571428571428578 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:arc_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:arc_easy", |
| "anchor_name": "arc_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.000321700208587572, |
| "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", |
| "accuracy": 0.21, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 158.264, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.028571428571428636 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8672773241996765, |
| "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 171.456, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.01428571428571426 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:mmlu_elementary_math", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:mmlu_elementary_math", |
| "anchor_name": "mmlu_elementary_math", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9442014694213867, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", |
| "accuracy": 0.20666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 149.307, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.04285714285714289 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:mmlu_high_school_biology", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:mmlu_high_school_biology", |
| "anchor_name": "mmlu_high_school_biology", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9451485872268677, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 161.958, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.01428571428571426 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:mmlu_high_school_physics", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:mmlu_high_school_physics", |
| "anchor_name": "mmlu_high_school_physics", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9600575566291809, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", |
| "accuracy": 0.21, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 160.754, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.028571428571428636 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:openbookqa", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:openbookqa", |
| "anchor_name": "openbookqa", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8676939606666565, |
| "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", |
| "accuracy": 0.2, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 160.035, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.07142857142857141 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r4:sciq", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r4:sciq", |
| "anchor_name": "sciq", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.0003326318983454257, |
| "adapter_dir": "/workspace/round3_out/round4/Y/sciq", |
| "accuracy": 0.21, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 154.652, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.028571428571428636 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8672773241996765, |
| "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 166.789, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.01428571428571426 |
| }, |
| { |
| "cell_id": "A::mbpp_plus::r5:pubmedqa_pqal", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "anchor_ref": "r5:pubmedqa_pqal", |
| "anchor_name": "pubmedqa_pqal", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8933367133140564, |
| "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", |
| "accuracy": 0.20333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 157.746, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "single_anchor_gap": -0.057142857142857155 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9843209981918335, |
| "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 52.845, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:mbpp", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:mbpp", |
| "anchor_name": "mbpp", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": -0.00017454303451813757, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", |
| "accuracy": 0.3, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 49.504, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.7777777777777776 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 1.0012516975402832, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", |
| "accuracy": 0.29, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 51.035, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.6666666666666664 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:conala_curated", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:conala_curated", |
| "anchor_name": "conala_curated", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.891869068145752, |
| "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 74.517, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.9843209981918335, |
| "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 55.158, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": -0.00017454303451813757, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", |
| "accuracy": 0.3, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 50.742, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.7777777777777776 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:aqua_rat", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:aqua_rat", |
| "anchor_name": "aqua_rat", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9019709825515747, |
| "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 52.984, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:gsm8k", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:gsm8k", |
| "anchor_name": "gsm8k", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": -0.00032459679641760886, |
| "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 0, |
| "eval_seconds": 46.407, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:math_algebra_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:math_algebra_easy", |
| "anchor_name": "math_algebra_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9648966789245605, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 48.28, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9844874739646912, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 0, |
| "eval_seconds": 51.705, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:multiarith", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:multiarith", |
| "anchor_name": "multiarith", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.969685435295105, |
| "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 50.83, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.0 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:svamp", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:svamp", |
| "anchor_name": "svamp", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9480026960372925, |
| "adapter_dir": "/workspace/round3_out/round4/Y/svamp", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 5, |
| "eval_seconds": 49.549, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:aqua_rat_numeric", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:aqua_rat_numeric", |
| "anchor_name": "aqua_rat_numeric", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.00034561159554868937, |
| "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 0, |
| "eval_seconds": 58.851, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.00020121937268413603, |
| "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 46.892, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:mawps", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:mawps", |
| "anchor_name": "mawps", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0002617448626551777, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mawps", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 47.997, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:arc_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:arc_easy", |
| "anchor_name": "arc_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.00020039879018440843, |
| "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 3, |
| "eval_seconds": 50.507, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8870396614074707, |
| "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 52.987, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:mmlu_elementary_math", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:mmlu_elementary_math", |
| "anchor_name": "mmlu_elementary_math", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9657072424888611, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", |
| "accuracy": 0.26, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 43.06, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.3333333333333333 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:mmlu_high_school_biology", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:mmlu_high_school_biology", |
| "anchor_name": "mmlu_high_school_biology", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9674594402313232, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 48.926, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:mmlu_high_school_physics", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:mmlu_high_school_physics", |
| "anchor_name": "mmlu_high_school_physics", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.983260452747345, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 53.664, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:openbookqa", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:openbookqa", |
| "anchor_name": "openbookqa", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.88755863904953, |
| "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 50.62, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r4:sciq", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r4:sciq", |
| "anchor_name": "sciq", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.00023939934908412397, |
| "adapter_dir": "/workspace/round3_out/round4/Y/sciq", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 54.751, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8870396614074707, |
| "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 51.649, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "A::mbpp_test_held::r5:pubmedqa_pqal", |
| "stage": "locality_single_anchor", |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "anchor_ref": "r5:pubmedqa_pqal", |
| "anchor_name": "pubmedqa_pqal", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.9136675000190735, |
| "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 50.918, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "single_anchor_gap": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9508311748504639, |
| "adapter_dir": "/workspace/round3_out/round4/Y/humaneval", |
| "accuracy": 0.7166666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 4.404, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.02439024390243918 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:mbpp", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:mbpp", |
| "anchor_name": "mbpp", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": -0.00021616967569570988, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp", |
| "accuracy": 0.6833333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 4.538, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.09756097560975592 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r4", |
| "anchor_domain": "code", |
| "cos_X": 0.9530814290046692, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized", |
| "accuracy": 0.6933333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 4.846, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.060975609756097345 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:conala_curated", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:conala_curated", |
| "anchor_name": "conala_curated", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.8603157997131348, |
| "adapter_dir": "/workspace/round3_out/round5/Y/conala_curated", |
| "accuracy": 0.7233333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 28.747, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.04878048780487836 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:humaneval", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:humaneval", |
| "anchor_name": "humaneval", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": 0.9508311748504639, |
| "adapter_dir": "/workspace/round3_out/round5/Y/humaneval", |
| "accuracy": 0.7166666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 4.672, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.02439024390243918 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:mbpp_sanitized", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:mbpp_sanitized", |
| "anchor_name": "mbpp_sanitized", |
| "anchor_round": "r5", |
| "anchor_domain": "code", |
| "cos_X": -0.00021616967569570988, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized", |
| "accuracy": 0.6833333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 4.696, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.09756097560975592 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:aqua_rat", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:aqua_rat", |
| "anchor_name": "aqua_rat", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.8845954537391663, |
| "adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat", |
| "accuracy": 0.7, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 27.648, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.03658536585365857 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:gsm8k", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:gsm8k", |
| "anchor_name": "gsm8k", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": -0.0003817097167484462, |
| "adapter_dir": "/workspace/round3_out/round4/Y/gsm8k", |
| "accuracy": 0.7333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 5.373, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.08536585365853654 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:math_algebra_easy", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:math_algebra_easy", |
| "anchor_name": "math_algebra_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9346193075180054, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy", |
| "accuracy": 0.7233333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 8.361, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.04878048780487836 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9526723623275757, |
| "adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy", |
| "accuracy": 0.7233333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 3.554, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.04878048780487836 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:multiarith", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:multiarith", |
| "anchor_name": "multiarith", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9391674399375916, |
| "adapter_dir": "/workspace/round3_out/round4/Y/multiarith", |
| "accuracy": 0.7266666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 4.66, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.060975609756097754 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:svamp", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:svamp", |
| "anchor_name": "svamp", |
| "anchor_round": "r4", |
| "anchor_domain": "math", |
| "cos_X": 0.9191423058509827, |
| "adapter_dir": "/workspace/round3_out/round4/Y/svamp", |
| "accuracy": 0.69, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 3.473, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.07317073170731714 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:aqua_rat_numeric", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:aqua_rat_numeric", |
| "anchor_name": "aqua_rat_numeric", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.0004580095992423594, |
| "adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric", |
| "accuracy": 0.7566666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 26.635, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.17073170731707346 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:math_counting_easy", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:math_counting_easy", |
| "anchor_name": "math_counting_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.00032379472395405173, |
| "adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy", |
| "accuracy": 0.7166666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 3.361, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.02439024390243918 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:mawps", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:mawps", |
| "anchor_name": "mawps", |
| "anchor_round": "r5", |
| "anchor_domain": "math", |
| "cos_X": -0.00039862250559963286, |
| "adapter_dir": "/workspace/round3_out/round5/Y/mawps", |
| "accuracy": 0.73, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 6.23, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.07317073170731714 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:arc_easy", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:arc_easy", |
| "anchor_name": "arc_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.0005088147590868175, |
| "adapter_dir": "/workspace/round3_out/round4/Y/arc_easy", |
| "accuracy": 0.7166666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 16.606, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.02439024390243918 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.8757078647613525, |
| "adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy", |
| "accuracy": 0.7133333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.411, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.012195121951219795 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:mmlu_elementary_math", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:mmlu_elementary_math", |
| "anchor_name": "mmlu_elementary_math", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9520111680030823, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math", |
| "accuracy": 0.7133333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 29.065, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.012195121951219795 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:mmlu_high_school_biology", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:mmlu_high_school_biology", |
| "anchor_name": "mmlu_high_school_biology", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9565833806991577, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology", |
| "accuracy": 0.6733333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 28.46, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.1341463414634145 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:mmlu_high_school_physics", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:mmlu_high_school_physics", |
| "anchor_name": "mmlu_high_school_physics", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.9671817421913147, |
| "adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics", |
| "accuracy": 0.6966666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 28.106, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.04878048780487796 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:openbookqa", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:openbookqa", |
| "anchor_name": "openbookqa", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": 0.89091557264328, |
| "adapter_dir": "/workspace/round3_out/round4/Y/openbookqa", |
| "accuracy": 0.81, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 19.183, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.3658536585365857 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r4:sciq", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r4:sciq", |
| "anchor_name": "sciq", |
| "anchor_round": "r4", |
| "anchor_domain": "science", |
| "cos_X": -0.00015819823602214456, |
| "adapter_dir": "/workspace/round3_out/round4/Y/sciq", |
| "accuracy": 0.7033333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 28.125, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.024390243902438775 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:medmcqa_easy", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:medmcqa_easy", |
| "anchor_name": "medmcqa_easy", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8757078647613525, |
| "adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy", |
| "accuracy": 0.7133333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 27.62, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": 0.012195121951219795 |
| }, |
| { |
| "cell_id": "A::openbookqa_test::r5:pubmedqa_pqal", |
| "stage": "locality_single_anchor", |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "anchor_ref": "r5:pubmedqa_pqal", |
| "anchor_name": "pubmedqa_pqal", |
| "anchor_round": "r5", |
| "anchor_domain": "science", |
| "cos_X": 0.8888986110687256, |
| "adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal", |
| "accuracy": 0.6966666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 22.009, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "single_anchor_gap": -0.04878048780487796 |
| } |
| ] |
| } |