cross-model-lora-prediction-3b / results_locality.json
CK0607's picture
Final workshop round: results_locality.json
b17aed7 verified
raw
history blame
98.4 kB
{
"config": {
"hub_repo": "CK0607/cross-model-lora-prediction-3b",
"model_Y": "meta-llama/Llama-3.2-3B-Instruct",
"no_surrogate": true,
"generation": {
"do_sample": false,
"num_beams": 1,
"max_new_tokens_code": 96,
"max_new_tokens_other": 24
},
"heldouts": [
"gsm_hard",
"gsm8k_test_500",
"mbpp_test_held",
"mbpp_plus",
"openbookqa_test"
],
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
]
},
"adapter_verification": {
"listing": {
"round4/X": [
"aqua_rat",
"arc_challenge",
"arc_easy",
"gsm8k",
"gsm8k_test_500",
"gsm_hard",
"humaneval",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"mbpp_plus",
"mbpp_sanitized",
"mbpp_test_held",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"multiarith",
"openbookqa",
"openbookqa_test",
"sciq",
"svamp"
],
"round4/Y": [
"aqua_rat",
"arc_challenge",
"arc_easy",
"gsm8k",
"gsm8k_test_500",
"gsm_hard",
"humaneval",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"mbpp_plus",
"mbpp_sanitized",
"mbpp_test_held",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"multiarith",
"openbookqa",
"openbookqa_test",
"sciq",
"svamp"
],
"round5/X": [
"aqua_rat_numeric",
"conala_curated",
"humaneval",
"math_counting_easy",
"mawps",
"mbpp_sanitized",
"medmcqa_easy",
"pubmedqa_pqal"
],
"round5/Y": [
"aqua_rat_numeric",
"conala_curated",
"humaneval",
"math_counting_easy",
"mawps",
"mbpp_sanitized",
"medmcqa_easy",
"pubmedqa_pqal"
],
"round6/Y_pred": [
"gsm8k_test_500_global_ridge_N12_seed0",
"gsm8k_test_500_global_ridge_N12_seed1",
"gsm8k_test_500_global_ridge_N12_seed2",
"gsm8k_test_500_global_ridge_N16_seed0",
"gsm8k_test_500_global_ridge_N16_seed1",
"gsm8k_test_500_global_ridge_N16_seed2",
"gsm8k_test_500_global_ridge_N24_full",
"gsm8k_test_500_global_ridge_N4_seed0",
"gsm8k_test_500_global_ridge_N4_seed1",
"gsm8k_test_500_global_ridge_N4_seed2",
"gsm8k_test_500_global_ridge_N8_seed0",
"gsm8k_test_500_global_ridge_N8_seed1",
"gsm8k_test_500_global_ridge_N8_seed2",
"gsm8k_test_500_mean_N12_seed0",
"gsm8k_test_500_mean_N12_seed1",
"gsm8k_test_500_mean_N12_seed2",
"gsm8k_test_500_mean_N16_seed0",
"gsm8k_test_500_mean_N16_seed1",
"gsm8k_test_500_mean_N16_seed2",
"gsm8k_test_500_mean_N24_full",
"gsm8k_test_500_mean_N4_seed0",
"gsm8k_test_500_mean_N4_seed1",
"gsm8k_test_500_mean_N4_seed2",
"gsm8k_test_500_mean_N8_seed0",
"gsm8k_test_500_mean_N8_seed1",
"gsm8k_test_500_mean_N8_seed2",
"gsm8k_test_500_topk8_global_ridge_N12_seed0",
"gsm8k_test_500_topk8_global_ridge_N12_seed1",
"gsm8k_test_500_topk8_global_ridge_N12_seed2",
"gsm8k_test_500_topk8_global_ridge_N16_seed0",
"gsm8k_test_500_topk8_global_ridge_N16_seed1",
"gsm8k_test_500_topk8_global_ridge_N16_seed2",
"gsm8k_test_500_topk8_global_ridge_N24_full",
"gsm8k_test_500_topk8_global_ridge_N4_seed0",
"gsm8k_test_500_topk8_global_ridge_N4_seed1",
"gsm8k_test_500_topk8_global_ridge_N4_seed2",
"gsm8k_test_500_topk8_global_ridge_N8_seed0",
"gsm8k_test_500_topk8_global_ridge_N8_seed1",
"gsm8k_test_500_topk8_global_ridge_N8_seed2",
"gsm_hard_global_ridge_N12_seed0",
"gsm_hard_global_ridge_N12_seed1",
"gsm_hard_global_ridge_N12_seed2",
"gsm_hard_global_ridge_N16_seed0",
"gsm_hard_global_ridge_N16_seed1",
"gsm_hard_global_ridge_N16_seed2",
"gsm_hard_global_ridge_N24_full",
"gsm_hard_global_ridge_N4_seed0",
"gsm_hard_global_ridge_N4_seed1",
"gsm_hard_global_ridge_N4_seed2",
"gsm_hard_global_ridge_N8_seed0",
"gsm_hard_global_ridge_N8_seed1",
"gsm_hard_global_ridge_N8_seed2",
"gsm_hard_mean_N12_seed0",
"gsm_hard_mean_N12_seed1",
"gsm_hard_mean_N12_seed2",
"gsm_hard_mean_N16_seed0",
"gsm_hard_mean_N16_seed1",
"gsm_hard_mean_N16_seed2",
"gsm_hard_mean_N24_full",
"gsm_hard_mean_N4_seed0",
"gsm_hard_mean_N4_seed1",
"gsm_hard_mean_N4_seed2",
"gsm_hard_mean_N8_seed0",
"gsm_hard_mean_N8_seed1",
"gsm_hard_mean_N8_seed2",
"gsm_hard_topk8_global_ridge_N12_seed0",
"gsm_hard_topk8_global_ridge_N12_seed1",
"gsm_hard_topk8_global_ridge_N12_seed2",
"gsm_hard_topk8_global_ridge_N16_seed0",
"gsm_hard_topk8_global_ridge_N16_seed1",
"gsm_hard_topk8_global_ridge_N16_seed2",
"gsm_hard_topk8_global_ridge_N24_full",
"gsm_hard_topk8_global_ridge_N4_seed0",
"gsm_hard_topk8_global_ridge_N4_seed1",
"gsm_hard_topk8_global_ridge_N4_seed2",
"gsm_hard_topk8_global_ridge_N8_seed0",
"gsm_hard_topk8_global_ridge_N8_seed1",
"gsm_hard_topk8_global_ridge_N8_seed2",
"mbpp_plus_global_ridge_N12_seed0",
"mbpp_plus_global_ridge_N12_seed1",
"mbpp_plus_global_ridge_N12_seed2",
"mbpp_plus_global_ridge_N16_seed0",
"mbpp_plus_global_ridge_N16_seed1",
"mbpp_plus_global_ridge_N16_seed2",
"mbpp_plus_global_ridge_N24_full",
"mbpp_plus_global_ridge_N4_seed0",
"mbpp_plus_global_ridge_N4_seed1",
"mbpp_plus_global_ridge_N4_seed2",
"mbpp_plus_global_ridge_N8_seed0",
"mbpp_plus_global_ridge_N8_seed1",
"mbpp_plus_global_ridge_N8_seed2",
"mbpp_plus_mean_N12_seed0",
"mbpp_plus_mean_N12_seed1",
"mbpp_plus_mean_N12_seed2",
"mbpp_plus_mean_N16_seed0",
"mbpp_plus_mean_N16_seed1",
"mbpp_plus_mean_N16_seed2",
"mbpp_plus_mean_N24_full",
"mbpp_plus_mean_N4_seed0",
"mbpp_plus_mean_N4_seed1",
"mbpp_plus_mean_N4_seed2",
"mbpp_plus_mean_N8_seed0",
"mbpp_plus_mean_N8_seed1",
"mbpp_plus_mean_N8_seed2",
"mbpp_plus_topk8_global_ridge_N12_seed0",
"mbpp_plus_topk8_global_ridge_N12_seed1",
"mbpp_plus_topk8_global_ridge_N12_seed2",
"mbpp_plus_topk8_global_ridge_N16_seed0",
"mbpp_plus_topk8_global_ridge_N16_seed1",
"mbpp_plus_topk8_global_ridge_N16_seed2",
"mbpp_plus_topk8_global_ridge_N24_full",
"mbpp_plus_topk8_global_ridge_N4_seed0",
"mbpp_plus_topk8_global_ridge_N4_seed1",
"mbpp_plus_topk8_global_ridge_N4_seed2",
"mbpp_plus_topk8_global_ridge_N8_seed0",
"mbpp_plus_topk8_global_ridge_N8_seed1",
"mbpp_plus_topk8_global_ridge_N8_seed2",
"mbpp_test_held_global_ridge_N12_seed0",
"mbpp_test_held_global_ridge_N12_seed1",
"mbpp_test_held_global_ridge_N12_seed2",
"mbpp_test_held_global_ridge_N16_seed0",
"mbpp_test_held_global_ridge_N16_seed1",
"mbpp_test_held_global_ridge_N16_seed2",
"mbpp_test_held_global_ridge_N24_full",
"mbpp_test_held_global_ridge_N4_seed0",
"mbpp_test_held_global_ridge_N4_seed1",
"mbpp_test_held_global_ridge_N4_seed2",
"mbpp_test_held_global_ridge_N8_seed0",
"mbpp_test_held_global_ridge_N8_seed1",
"mbpp_test_held_global_ridge_N8_seed2",
"mbpp_test_held_mean_N12_seed0",
"mbpp_test_held_mean_N12_seed1",
"mbpp_test_held_mean_N12_seed2",
"mbpp_test_held_mean_N16_seed0",
"mbpp_test_held_mean_N16_seed1",
"mbpp_test_held_mean_N16_seed2",
"mbpp_test_held_mean_N24_full",
"mbpp_test_held_mean_N4_seed0",
"mbpp_test_held_mean_N4_seed1",
"mbpp_test_held_mean_N4_seed2",
"mbpp_test_held_mean_N8_seed0",
"mbpp_test_held_mean_N8_seed1",
"mbpp_test_held_mean_N8_seed2",
"mbpp_test_held_topk8_global_ridge_N12_seed0",
"mbpp_test_held_topk8_global_ridge_N12_seed1",
"mbpp_test_held_topk8_global_ridge_N12_seed2",
"mbpp_test_held_topk8_global_ridge_N16_seed0",
"mbpp_test_held_topk8_global_ridge_N16_seed1",
"mbpp_test_held_topk8_global_ridge_N16_seed2",
"mbpp_test_held_topk8_global_ridge_N24_full",
"mbpp_test_held_topk8_global_ridge_N4_seed0",
"mbpp_test_held_topk8_global_ridge_N4_seed1",
"mbpp_test_held_topk8_global_ridge_N4_seed2",
"mbpp_test_held_topk8_global_ridge_N8_seed0",
"mbpp_test_held_topk8_global_ridge_N8_seed1",
"mbpp_test_held_topk8_global_ridge_N8_seed2",
"openbookqa_test_global_ridge_N12_seed0",
"openbookqa_test_global_ridge_N12_seed1",
"openbookqa_test_global_ridge_N12_seed2",
"openbookqa_test_global_ridge_N16_seed0",
"openbookqa_test_global_ridge_N16_seed1",
"openbookqa_test_global_ridge_N16_seed2",
"openbookqa_test_global_ridge_N24_full",
"openbookqa_test_global_ridge_N4_seed0",
"openbookqa_test_global_ridge_N4_seed1",
"openbookqa_test_global_ridge_N4_seed2",
"openbookqa_test_global_ridge_N8_seed0",
"openbookqa_test_global_ridge_N8_seed1",
"openbookqa_test_global_ridge_N8_seed2",
"openbookqa_test_mean_N12_seed0",
"openbookqa_test_mean_N12_seed1",
"openbookqa_test_mean_N12_seed2",
"openbookqa_test_mean_N16_seed0",
"openbookqa_test_mean_N16_seed1",
"openbookqa_test_mean_N16_seed2",
"openbookqa_test_mean_N24_full",
"openbookqa_test_mean_N4_seed0",
"openbookqa_test_mean_N4_seed1",
"openbookqa_test_mean_N4_seed2",
"openbookqa_test_mean_N8_seed0",
"openbookqa_test_mean_N8_seed1",
"openbookqa_test_mean_N8_seed2",
"openbookqa_test_topk8_global_ridge_N12_seed0",
"openbookqa_test_topk8_global_ridge_N12_seed1",
"openbookqa_test_topk8_global_ridge_N12_seed2",
"openbookqa_test_topk8_global_ridge_N16_seed0",
"openbookqa_test_topk8_global_ridge_N16_seed1",
"openbookqa_test_topk8_global_ridge_N16_seed2",
"openbookqa_test_topk8_global_ridge_N24_full",
"openbookqa_test_topk8_global_ridge_N4_seed0",
"openbookqa_test_topk8_global_ridge_N4_seed1",
"openbookqa_test_topk8_global_ridge_N4_seed2",
"openbookqa_test_topk8_global_ridge_N8_seed0",
"openbookqa_test_topk8_global_ridge_N8_seed1",
"openbookqa_test_topk8_global_ridge_N8_seed2"
],
"round8/Y_pred": [
"gsm8k_test_500_pertensor_pca_N12_seed0",
"gsm8k_test_500_pertensor_pca_N12_seed1",
"gsm8k_test_500_pertensor_pca_N12_seed2",
"gsm8k_test_500_pertensor_pca_N16_seed0",
"gsm8k_test_500_pertensor_pca_N16_seed1",
"gsm8k_test_500_pertensor_pca_N16_seed2",
"gsm8k_test_500_pertensor_pca_N24_full",
"gsm8k_test_500_pertensor_ridge_N12_seed0",
"gsm8k_test_500_pertensor_ridge_N12_seed1",
"gsm8k_test_500_pertensor_ridge_N12_seed2",
"gsm8k_test_500_pertensor_ridge_N16_seed0",
"gsm8k_test_500_pertensor_ridge_N16_seed1",
"gsm8k_test_500_pertensor_ridge_N16_seed2",
"gsm8k_test_500_pertensor_ridge_N24_full",
"gsm8k_test_500_procrustes_N12_seed0",
"gsm8k_test_500_procrustes_N12_seed1",
"gsm8k_test_500_procrustes_N12_seed2",
"gsm8k_test_500_procrustes_N16_seed0",
"gsm8k_test_500_procrustes_N16_seed1",
"gsm8k_test_500_procrustes_N16_seed2",
"gsm8k_test_500_procrustes_N24_full",
"gsm8k_test_500_topk12_global_ridge_N24_full",
"gsm8k_test_500_topk16_global_ridge_N24_full",
"gsm8k_test_500_topk20_global_ridge_N24_full",
"gsm8k_test_500_topk24_global_ridge_N24_full",
"gsm8k_test_500_topk2_global_ridge_N24_full",
"gsm8k_test_500_topk4_global_ridge_N24_full",
"gsm8k_test_500_topk6_global_ridge_N24_full",
"gsm8k_test_500_topk8_global_ridge_N24_full",
"gsm_hard_pertensor_pca_N12_seed0",
"gsm_hard_pertensor_pca_N12_seed1",
"gsm_hard_pertensor_pca_N12_seed2",
"gsm_hard_pertensor_pca_N16_seed0",
"gsm_hard_pertensor_pca_N16_seed1",
"gsm_hard_pertensor_pca_N16_seed2",
"gsm_hard_pertensor_pca_N24_full",
"gsm_hard_pertensor_ridge_N12_seed0",
"gsm_hard_pertensor_ridge_N12_seed1",
"gsm_hard_pertensor_ridge_N12_seed2",
"gsm_hard_pertensor_ridge_N16_seed0",
"gsm_hard_pertensor_ridge_N16_seed1",
"gsm_hard_pertensor_ridge_N16_seed2",
"gsm_hard_pertensor_ridge_N24_full",
"gsm_hard_procrustes_N12_seed0",
"gsm_hard_procrustes_N12_seed1",
"gsm_hard_procrustes_N12_seed2",
"gsm_hard_procrustes_N16_seed0",
"gsm_hard_procrustes_N16_seed1",
"gsm_hard_procrustes_N16_seed2",
"gsm_hard_procrustes_N24_full",
"gsm_hard_topk12_global_ridge_N24_full",
"gsm_hard_topk16_global_ridge_N24_full",
"gsm_hard_topk20_global_ridge_N24_full",
"gsm_hard_topk24_global_ridge_N24_full",
"gsm_hard_topk2_global_ridge_N24_full",
"gsm_hard_topk4_global_ridge_N24_full",
"gsm_hard_topk6_global_ridge_N24_full",
"gsm_hard_topk8_global_ridge_N24_full",
"mbpp_plus_pertensor_pca_N12_seed0",
"mbpp_plus_pertensor_pca_N12_seed1",
"mbpp_plus_pertensor_pca_N12_seed2",
"mbpp_plus_pertensor_pca_N16_seed0",
"mbpp_plus_pertensor_pca_N16_seed1",
"mbpp_plus_pertensor_pca_N16_seed2",
"mbpp_plus_pertensor_pca_N24_full",
"mbpp_plus_pertensor_ridge_N12_seed0",
"mbpp_plus_pertensor_ridge_N12_seed1",
"mbpp_plus_pertensor_ridge_N12_seed2",
"mbpp_plus_pertensor_ridge_N16_seed0",
"mbpp_plus_pertensor_ridge_N16_seed1",
"mbpp_plus_pertensor_ridge_N16_seed2",
"mbpp_plus_pertensor_ridge_N24_full",
"mbpp_plus_procrustes_N12_seed0",
"mbpp_plus_procrustes_N12_seed1",
"mbpp_plus_procrustes_N12_seed2",
"mbpp_plus_procrustes_N16_seed0",
"mbpp_plus_procrustes_N16_seed1",
"mbpp_plus_procrustes_N16_seed2",
"mbpp_plus_procrustes_N24_full",
"mbpp_plus_topk12_global_ridge_N24_full",
"mbpp_plus_topk16_global_ridge_N24_full",
"mbpp_plus_topk20_global_ridge_N24_full",
"mbpp_plus_topk24_global_ridge_N24_full",
"mbpp_plus_topk2_global_ridge_N24_full",
"mbpp_plus_topk4_global_ridge_N24_full",
"mbpp_plus_topk6_global_ridge_N24_full",
"mbpp_plus_topk8_global_ridge_N24_full",
"mbpp_test_held_pertensor_pca_N12_seed0",
"mbpp_test_held_pertensor_pca_N12_seed1",
"mbpp_test_held_pertensor_pca_N12_seed2",
"mbpp_test_held_pertensor_pca_N16_seed0",
"mbpp_test_held_pertensor_pca_N16_seed1",
"mbpp_test_held_pertensor_pca_N16_seed2",
"mbpp_test_held_pertensor_pca_N24_full",
"mbpp_test_held_pertensor_ridge_N12_seed0",
"mbpp_test_held_pertensor_ridge_N12_seed1",
"mbpp_test_held_pertensor_ridge_N12_seed2",
"mbpp_test_held_pertensor_ridge_N16_seed0",
"mbpp_test_held_pertensor_ridge_N16_seed1",
"mbpp_test_held_pertensor_ridge_N16_seed2",
"mbpp_test_held_pertensor_ridge_N24_full",
"mbpp_test_held_procrustes_N12_seed0",
"mbpp_test_held_procrustes_N12_seed1",
"mbpp_test_held_procrustes_N12_seed2",
"mbpp_test_held_procrustes_N16_seed0",
"mbpp_test_held_procrustes_N16_seed1",
"mbpp_test_held_procrustes_N16_seed2",
"mbpp_test_held_procrustes_N24_full",
"mbpp_test_held_topk12_global_ridge_N24_full",
"mbpp_test_held_topk16_global_ridge_N24_full",
"mbpp_test_held_topk20_global_ridge_N24_full",
"mbpp_test_held_topk24_global_ridge_N24_full",
"mbpp_test_held_topk2_global_ridge_N24_full",
"mbpp_test_held_topk4_global_ridge_N24_full",
"mbpp_test_held_topk6_global_ridge_N24_full",
"mbpp_test_held_topk8_global_ridge_N24_full",
"openbookqa_test_pertensor_pca_N12_seed0",
"openbookqa_test_pertensor_pca_N12_seed1",
"openbookqa_test_pertensor_pca_N12_seed2",
"openbookqa_test_pertensor_pca_N16_seed0",
"openbookqa_test_pertensor_pca_N16_seed1",
"openbookqa_test_pertensor_pca_N16_seed2",
"openbookqa_test_pertensor_pca_N24_full",
"openbookqa_test_pertensor_ridge_N12_seed0",
"openbookqa_test_pertensor_ridge_N12_seed1",
"openbookqa_test_pertensor_ridge_N12_seed2",
"openbookqa_test_pertensor_ridge_N16_seed0",
"openbookqa_test_pertensor_ridge_N16_seed1",
"openbookqa_test_pertensor_ridge_N16_seed2",
"openbookqa_test_pertensor_ridge_N24_full",
"openbookqa_test_procrustes_N12_seed0",
"openbookqa_test_procrustes_N12_seed1",
"openbookqa_test_procrustes_N12_seed2",
"openbookqa_test_procrustes_N16_seed0",
"openbookqa_test_procrustes_N16_seed1",
"openbookqa_test_procrustes_N16_seed2",
"openbookqa_test_procrustes_N24_full",
"openbookqa_test_topk12_global_ridge_N24_full",
"openbookqa_test_topk16_global_ridge_N24_full",
"openbookqa_test_topk20_global_ridge_N24_full",
"openbookqa_test_topk24_global_ridge_N24_full",
"openbookqa_test_topk2_global_ridge_N24_full",
"openbookqa_test_topk4_global_ridge_N24_full",
"openbookqa_test_topk6_global_ridge_N24_full",
"openbookqa_test_topk8_global_ridge_N24_full"
]
},
"missing": [],
"count_warnings": []
},
"baselines": {
"gsm_hard": {
"base_Y": 0.06333333333333334,
"oracle": 0.15
},
"gsm8k_test_500": {
"base_Y": 0.08,
"oracle": 0.29333333333333333
},
"mbpp_test_held": {
"base_Y": 0.23,
"oracle": 0.32
},
"mbpp_plus": {
"base_Y": 0.21666666666666667,
"oracle": 0.45
},
"openbookqa_test": {
"base_Y": 0.71,
"oracle": 0.9833333333333333
}
},
"summary": {
"overall_spearman": 0.1299305286501594,
"per_heldout": {
"gsm_hard": 0.28540339681524574,
"gsm8k_test_500": 0.04367789323240817,
"mbpp_test_held": -0.2896337305973781,
"mbpp_plus": -0.10085771557215327,
"openbookqa_test": -0.2975768832863932
},
"per_target_domain": {
"code": 0.017636096528107978,
"math": 0.2826406573020395,
"science": -0.2975768832863932
},
"per_anchor_domain": {
"code": 0.14899075217681867,
"math": 0.07629665365697172,
"science": 0.2376382913201063
},
"decision": "weak locality, ridge subsumes it"
},
"records": [
{
"cell_id": "A::gsm8k_test_500::r4:humaneval",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9497017860412598,
"adapter_dir": "/workspace/round3_out/round4/Y/humaneval",
"accuracy": 0.17333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 28.044,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.43750000000000006
},
{
"cell_id": "A::gsm8k_test_500::r4:mbpp",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:mbpp",
"anchor_name": "mbpp",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": -0.00027140171732753515,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp",
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 24.609,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.09375000000000003
},
{
"cell_id": "A::gsm8k_test_500::r4:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9527238011360168,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized",
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 23.479,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.06250000000000001
},
{
"cell_id": "A::gsm8k_test_500::r5:conala_curated",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:conala_curated",
"anchor_name": "conala_curated",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.8599404692649841,
"adapter_dir": "/workspace/round3_out/round5/Y/conala_curated",
"accuracy": 0.18333333333333332,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 28.998,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.48437499999999994
},
{
"cell_id": "A::gsm8k_test_500::r5:humaneval",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.9497017860412598,
"adapter_dir": "/workspace/round3_out/round5/Y/humaneval",
"accuracy": 0.17333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 29.242,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.43750000000000006
},
{
"cell_id": "A::gsm8k_test_500::r5:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": -0.00027140171732753515,
"adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized",
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 25.368,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.09375000000000003
},
{
"cell_id": "A::gsm8k_test_500::r4:aqua_rat",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:aqua_rat",
"anchor_name": "aqua_rat",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8731036186218262,
"adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat",
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 28.153,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.10937500000000003
},
{
"cell_id": "A::gsm8k_test_500::r4:gsm8k",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:gsm8k",
"anchor_name": "gsm8k",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": -0.0006812263745814562,
"adapter_dir": "/workspace/round3_out/round4/Y/gsm8k",
"accuracy": 0.14,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 6.608,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.28125000000000006
},
{
"cell_id": "A::gsm8k_test_500::r4:math_algebra_easy",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:math_algebra_easy",
"anchor_name": "math_algebra_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9428298473358154,
"adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy",
"accuracy": 0.08333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 6.667,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.015624999999999972
},
{
"cell_id": "A::gsm8k_test_500::r4:math_counting_easy",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9606146216392517,
"adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy",
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 10.451,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.03125000000000001
},
{
"cell_id": "A::gsm8k_test_500::r4:multiarith",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:multiarith",
"anchor_name": "multiarith",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9475974440574646,
"adapter_dir": "/workspace/round3_out/round4/Y/multiarith",
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 15.16,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.015625000000000038
},
{
"cell_id": "A::gsm8k_test_500::r4:svamp",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:svamp",
"anchor_name": "svamp",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9288908839225769,
"adapter_dir": "/workspace/round3_out/round4/Y/svamp",
"accuracy": 0.07,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 5.968,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.04687499999999998
},
{
"cell_id": "A::gsm8k_test_500::r5:aqua_rat_numeric",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:aqua_rat_numeric",
"anchor_name": "aqua_rat_numeric",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0004875913728028536,
"adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric",
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 29.239,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::gsm8k_test_500::r5:math_counting_easy",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0004786302160937339,
"adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy",
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 8.228,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.03125000000000001
},
{
"cell_id": "A::gsm8k_test_500::r5:mawps",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:mawps",
"anchor_name": "mawps",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0008274300489574671,
"adapter_dir": "/workspace/round3_out/round5/Y/mawps",
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 10.047,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.015625000000000038
},
{
"cell_id": "A::gsm8k_test_500::r4:arc_easy",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:arc_easy",
"anchor_name": "arc_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.0004913151497021317,
"adapter_dir": "/workspace/round3_out/round4/Y/arc_easy",
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 28.256,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.03125000000000001
},
{
"cell_id": "A::gsm8k_test_500::r4:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8598978519439697,
"adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy",
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.938,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.06250000000000001
},
{
"cell_id": "A::gsm8k_test_500::r4:mmlu_elementary_math",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:mmlu_elementary_math",
"anchor_name": "mmlu_elementary_math",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9377825260162354,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math",
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 26.479,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::gsm8k_test_500::r4:mmlu_high_school_biology",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:mmlu_high_school_biology",
"anchor_name": "mmlu_high_school_biology",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9370604753494263,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology",
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.829,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": -0.015625000000000038
},
{
"cell_id": "A::gsm8k_test_500::r4:mmlu_high_school_physics",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:mmlu_high_school_physics",
"anchor_name": "mmlu_high_school_physics",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9513278603553772,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics",
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 27.159,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.06250000000000001
},
{
"cell_id": "A::gsm8k_test_500::r4:openbookqa",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:openbookqa",
"anchor_name": "openbookqa",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8619711995124817,
"adapter_dir": "/workspace/round3_out/round4/Y/openbookqa",
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 29.32,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.07812499999999999
},
{
"cell_id": "A::gsm8k_test_500::r4:sciq",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r4:sciq",
"anchor_name": "sciq",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.00034972114372067153,
"adapter_dir": "/workspace/round3_out/round4/Y/sciq",
"accuracy": 0.1,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 28.327,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.09375000000000003
},
{
"cell_id": "A::gsm8k_test_500::r5:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8598978519439697,
"adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy",
"accuracy": 0.09333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 28.144,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.06250000000000001
},
{
"cell_id": "A::gsm8k_test_500::r5:pubmedqa_pqal",
"stage": "locality_single_anchor",
"task": "gsm8k_test_500",
"target_domain": "math",
"anchor_ref": "r5:pubmedqa_pqal",
"anchor_name": "pubmedqa_pqal",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8853808641433716,
"adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal",
"accuracy": 0.08,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 18.209,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::gsm_hard::r4:humaneval",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.8956640958786011,
"adapter_dir": "/workspace/round3_out/round4/Y/humaneval",
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 28.25,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": 0.15384615384615374
},
{
"cell_id": "A::gsm_hard::r4:mbpp",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:mbpp",
"anchor_name": "mbpp",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": -0.000505593023262918,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp",
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 27.549,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.07692307692307702
},
{
"cell_id": "A::gsm_hard::r4:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.8983818888664246,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized",
"accuracy": 0.05333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 27.652,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.11538461538461542
},
{
"cell_id": "A::gsm_hard::r5:conala_curated",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:conala_curated",
"anchor_name": "conala_curated",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.8125380277633667,
"adapter_dir": "/workspace/round3_out/round5/Y/conala_curated",
"accuracy": 0.07333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 28.894,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": 0.11538461538461534
},
{
"cell_id": "A::gsm_hard::r5:humaneval",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.8956640958786011,
"adapter_dir": "/workspace/round3_out/round5/Y/humaneval",
"accuracy": 0.07666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 29.186,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": 0.15384615384615374
},
{
"cell_id": "A::gsm_hard::r5:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": -0.000505593023262918,
"adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized",
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 28.051,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.07692307692307702
},
{
"cell_id": "A::gsm_hard::r4:aqua_rat",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:aqua_rat",
"anchor_name": "aqua_rat",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8249850273132324,
"adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat",
"accuracy": 0.05333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 28.068,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.11538461538461542
},
{
"cell_id": "A::gsm_hard::r4:gsm8k",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:gsm8k",
"anchor_name": "gsm8k",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": -0.0006781710544601083,
"adapter_dir": "/workspace/round3_out/round4/Y/gsm8k",
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 14.301,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": 0.038461538461538394
},
{
"cell_id": "A::gsm_hard::r4:math_algebra_easy",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:math_algebra_easy",
"anchor_name": "math_algebra_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8887502551078796,
"adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy",
"accuracy": 0.05333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 17.285,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.11538461538461542
},
{
"cell_id": "A::gsm_hard::r4:math_counting_easy",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9051075577735901,
"adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy",
"accuracy": 0.03666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 20.983,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.30769230769230776
},
{
"cell_id": "A::gsm_hard::r4:multiarith",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:multiarith",
"anchor_name": "multiarith",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8932132124900818,
"adapter_dir": "/workspace/round3_out/round4/Y/multiarith",
"accuracy": 0.04666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 26.632,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.19230769230769237
},
{
"cell_id": "A::gsm_hard::r4:svamp",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:svamp",
"anchor_name": "svamp",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8764325380325317,
"adapter_dir": "/workspace/round3_out/round4/Y/svamp",
"accuracy": 0.06666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 23.451,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": 0.038461538461538394
},
{
"cell_id": "A::gsm_hard::r5:aqua_rat_numeric",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:aqua_rat_numeric",
"anchor_name": "aqua_rat_numeric",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0008573997183702886,
"adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric",
"accuracy": 0.03666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 29.81,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.30769230769230776
},
{
"cell_id": "A::gsm_hard::r5:math_counting_easy",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0006265510455705225,
"adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy",
"accuracy": 0.04,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 19.003,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.26923076923076933
},
{
"cell_id": "A::gsm_hard::r5:mawps",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:mawps",
"anchor_name": "mawps",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0011123694712296128,
"adapter_dir": "/workspace/round3_out/round5/Y/mawps",
"accuracy": 0.02,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 20.956,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.5000000000000001
},
{
"cell_id": "A::gsm_hard::r4:arc_easy",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:arc_easy",
"anchor_name": "arc_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.0009216612670570612,
"adapter_dir": "/workspace/round3_out/round4/Y/arc_easy",
"accuracy": 0.05,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 29.551,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.15384615384615388
},
{
"cell_id": "A::gsm_hard::r4:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8125821948051453,
"adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy",
"accuracy": 0.03333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.713,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.34615384615384626
},
{
"cell_id": "A::gsm_hard::r4:mmlu_elementary_math",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:mmlu_elementary_math",
"anchor_name": "mmlu_elementary_math",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8851031064987183,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math",
"accuracy": 0.03,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 29.335,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.38461538461538475
},
{
"cell_id": "A::gsm_hard::r4:mmlu_high_school_biology",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:mmlu_high_school_biology",
"anchor_name": "mmlu_high_school_biology",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8839024305343628,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology",
"accuracy": 0.04,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 29.358,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.26923076923076933
},
{
"cell_id": "A::gsm_hard::r4:mmlu_high_school_physics",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:mmlu_high_school_physics",
"anchor_name": "mmlu_high_school_physics",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8970074653625488,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics",
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 28.333,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.038461538461538554
},
{
"cell_id": "A::gsm_hard::r4:openbookqa",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:openbookqa",
"anchor_name": "openbookqa",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8150046467781067,
"adapter_dir": "/workspace/round3_out/round4/Y/openbookqa",
"accuracy": 0.056666666666666664,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 29.83,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.07692307692307702
},
{
"cell_id": "A::gsm_hard::r4:sciq",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r4:sciq",
"anchor_name": "sciq",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.0006476823473349214,
"adapter_dir": "/workspace/round3_out/round4/Y/sciq",
"accuracy": 0.023333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 28.818,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.4615384615384617
},
{
"cell_id": "A::gsm_hard::r5:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8125821948051453,
"adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy",
"accuracy": 0.03333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 28.137,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.34615384615384626
},
{
"cell_id": "A::gsm_hard::r5:pubmedqa_pqal",
"stage": "locality_single_anchor",
"task": "gsm_hard",
"target_domain": "math",
"anchor_ref": "r5:pubmedqa_pqal",
"anchor_name": "pubmedqa_pqal",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8367233276367188,
"adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal",
"accuracy": 0.05,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.028,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"single_anchor_gap": -0.15384615384615388
},
{
"cell_id": "A::mbpp_plus::r4:humaneval",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9624950885772705,
"adapter_dir": "/workspace/round3_out/round4/Y/humaneval",
"accuracy": 0.2,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 150.314,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.07142857142857141
},
{
"cell_id": "A::mbpp_plus::r4:mbpp",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:mbpp",
"anchor_name": "mbpp",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": -0.0003052547399420291,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp",
"accuracy": 0.2833333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 146.154,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": 0.28571428571428564
},
{
"cell_id": "A::mbpp_plus::r4:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9884072542190552,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized",
"accuracy": 0.2633333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 149.756,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": 0.19999999999999984
},
{
"cell_id": "A::mbpp_plus::r5:conala_curated",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:conala_curated",
"anchor_name": "conala_curated",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.8741890788078308,
"adapter_dir": "/workspace/round3_out/round5/Y/conala_curated",
"accuracy": 0.18666666666666668,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 217.294,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.12857142857142856
},
{
"cell_id": "A::mbpp_plus::r5:humaneval",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.9624950885772705,
"adapter_dir": "/workspace/round3_out/round5/Y/humaneval",
"accuracy": 0.2,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 157.393,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.07142857142857141
},
{
"cell_id": "A::mbpp_plus::r5:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": -0.0003052547399420291,
"adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized",
"accuracy": 0.2833333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 149.779,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": 0.28571428571428564
},
{
"cell_id": "A::mbpp_plus::r4:aqua_rat",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:aqua_rat",
"anchor_name": "aqua_rat",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8814506530761719,
"adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat",
"accuracy": 0.21666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 165.548,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::mbpp_plus::r4:gsm8k",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:gsm8k",
"anchor_name": "gsm8k",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": -0.000335412856657058,
"adapter_dir": "/workspace/round3_out/round4/Y/gsm8k",
"accuracy": 0.20666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 161.016,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.04285714285714289
},
{
"cell_id": "A::mbpp_plus::r4:math_algebra_easy",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:math_algebra_easy",
"anchor_name": "math_algebra_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9431692361831665,
"adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy",
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 152.059,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.01428571428571426
},
{
"cell_id": "A::mbpp_plus::r4:math_counting_easy",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9616791009902954,
"adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy",
"accuracy": 0.22333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 161.601,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": 0.02857142857142852
},
{
"cell_id": "A::mbpp_plus::r4:multiarith",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:multiarith",
"anchor_name": "multiarith",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9476068615913391,
"adapter_dir": "/workspace/round3_out/round4/Y/multiarith",
"accuracy": 0.18333333333333332,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 157.375,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.14285714285714293
},
{
"cell_id": "A::mbpp_plus::r4:svamp",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:svamp",
"anchor_name": "svamp",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9265090823173523,
"adapter_dir": "/workspace/round3_out/round4/Y/svamp",
"accuracy": 0.2,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 156.805,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.07142857142857141
},
{
"cell_id": "A::mbpp_plus::r5:aqua_rat_numeric",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:aqua_rat_numeric",
"anchor_name": "aqua_rat_numeric",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.00045860654790885746,
"adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric",
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 181.409,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.01428571428571426
},
{
"cell_id": "A::mbpp_plus::r5:math_counting_easy",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0002999037387780845,
"adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy",
"accuracy": 0.22,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 153.077,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": 0.01428571428571426
},
{
"cell_id": "A::mbpp_plus::r5:mawps",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:mawps",
"anchor_name": "mawps",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0004362465988378972,
"adapter_dir": "/workspace/round3_out/round5/Y/mawps",
"accuracy": 0.19666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 160.786,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.08571428571428578
},
{
"cell_id": "A::mbpp_plus::r4:arc_easy",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:arc_easy",
"anchor_name": "arc_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.000321700208587572,
"adapter_dir": "/workspace/round3_out/round4/Y/arc_easy",
"accuracy": 0.21,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 158.264,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.028571428571428636
},
{
"cell_id": "A::mbpp_plus::r4:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8672773241996765,
"adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy",
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 171.456,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.01428571428571426
},
{
"cell_id": "A::mbpp_plus::r4:mmlu_elementary_math",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:mmlu_elementary_math",
"anchor_name": "mmlu_elementary_math",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9442014694213867,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math",
"accuracy": 0.20666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 149.307,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.04285714285714289
},
{
"cell_id": "A::mbpp_plus::r4:mmlu_high_school_biology",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:mmlu_high_school_biology",
"anchor_name": "mmlu_high_school_biology",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9451485872268677,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology",
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 161.958,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.01428571428571426
},
{
"cell_id": "A::mbpp_plus::r4:mmlu_high_school_physics",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:mmlu_high_school_physics",
"anchor_name": "mmlu_high_school_physics",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9600575566291809,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics",
"accuracy": 0.21,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 160.754,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.028571428571428636
},
{
"cell_id": "A::mbpp_plus::r4:openbookqa",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:openbookqa",
"anchor_name": "openbookqa",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8676939606666565,
"adapter_dir": "/workspace/round3_out/round4/Y/openbookqa",
"accuracy": 0.2,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 160.035,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.07142857142857141
},
{
"cell_id": "A::mbpp_plus::r4:sciq",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r4:sciq",
"anchor_name": "sciq",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.0003326318983454257,
"adapter_dir": "/workspace/round3_out/round4/Y/sciq",
"accuracy": 0.21,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 154.652,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.028571428571428636
},
{
"cell_id": "A::mbpp_plus::r5:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8672773241996765,
"adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy",
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 166.789,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.01428571428571426
},
{
"cell_id": "A::mbpp_plus::r5:pubmedqa_pqal",
"stage": "locality_single_anchor",
"task": "mbpp_plus",
"target_domain": "code",
"anchor_ref": "r5:pubmedqa_pqal",
"anchor_name": "pubmedqa_pqal",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8933367133140564,
"adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal",
"accuracy": 0.20333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 157.746,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"single_anchor_gap": -0.057142857142857155
},
{
"cell_id": "A::mbpp_test_held::r4:humaneval",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9843209981918335,
"adapter_dir": "/workspace/round3_out/round4/Y/humaneval",
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 52.845,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::mbpp_test_held::r4:mbpp",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:mbpp",
"anchor_name": "mbpp",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": -0.00017454303451813757,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp",
"accuracy": 0.3,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 49.504,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.7777777777777776
},
{
"cell_id": "A::mbpp_test_held::r4:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 1.0012516975402832,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized",
"accuracy": 0.29,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 51.035,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.6666666666666664
},
{
"cell_id": "A::mbpp_test_held::r5:conala_curated",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:conala_curated",
"anchor_name": "conala_curated",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.891869068145752,
"adapter_dir": "/workspace/round3_out/round5/Y/conala_curated",
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 74.517,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::mbpp_test_held::r5:humaneval",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.9843209981918335,
"adapter_dir": "/workspace/round3_out/round5/Y/humaneval",
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 55.158,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::mbpp_test_held::r5:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": -0.00017454303451813757,
"adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized",
"accuracy": 0.3,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 50.742,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.7777777777777776
},
{
"cell_id": "A::mbpp_test_held::r4:aqua_rat",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:aqua_rat",
"anchor_name": "aqua_rat",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9019709825515747,
"adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 52.984,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::mbpp_test_held::r4:gsm8k",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:gsm8k",
"anchor_name": "gsm8k",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": -0.00032459679641760886,
"adapter_dir": "/workspace/round3_out/round4/Y/gsm8k",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 46.407,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r4:math_algebra_easy",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:math_algebra_easy",
"anchor_name": "math_algebra_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9648966789245605,
"adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 48.28,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r4:math_counting_easy",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9844874739646912,
"adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy",
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 51.705,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::mbpp_test_held::r4:multiarith",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:multiarith",
"anchor_name": "multiarith",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.969685435295105,
"adapter_dir": "/workspace/round3_out/round4/Y/multiarith",
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 50.83,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.0
},
{
"cell_id": "A::mbpp_test_held::r4:svamp",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:svamp",
"anchor_name": "svamp",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9480026960372925,
"adapter_dir": "/workspace/round3_out/round4/Y/svamp",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 5,
"eval_seconds": 49.549,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r5:aqua_rat_numeric",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:aqua_rat_numeric",
"anchor_name": "aqua_rat_numeric",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.00034561159554868937,
"adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 0,
"eval_seconds": 58.851,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r5:math_counting_easy",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.00020121937268413603,
"adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 46.892,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::mbpp_test_held::r5:mawps",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:mawps",
"anchor_name": "mawps",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0002617448626551777,
"adapter_dir": "/workspace/round3_out/round5/Y/mawps",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 47.997,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::mbpp_test_held::r4:arc_easy",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:arc_easy",
"anchor_name": "arc_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.00020039879018440843,
"adapter_dir": "/workspace/round3_out/round4/Y/arc_easy",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 3,
"eval_seconds": 50.507,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r4:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8870396614074707,
"adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 52.987,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r4:mmlu_elementary_math",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:mmlu_elementary_math",
"anchor_name": "mmlu_elementary_math",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9657072424888611,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math",
"accuracy": 0.26,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 43.06,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.3333333333333333
},
{
"cell_id": "A::mbpp_test_held::r4:mmlu_high_school_biology",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:mmlu_high_school_biology",
"anchor_name": "mmlu_high_school_biology",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9674594402313232,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 48.926,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r4:mmlu_high_school_physics",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:mmlu_high_school_physics",
"anchor_name": "mmlu_high_school_physics",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.983260452747345,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 53.664,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::mbpp_test_held::r4:openbookqa",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:openbookqa",
"anchor_name": "openbookqa",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.88755863904953,
"adapter_dir": "/workspace/round3_out/round4/Y/openbookqa",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 50.62,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::mbpp_test_held::r4:sciq",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r4:sciq",
"anchor_name": "sciq",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.00023939934908412397,
"adapter_dir": "/workspace/round3_out/round4/Y/sciq",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 54.751,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::mbpp_test_held::r5:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8870396614074707,
"adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 51.649,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.22222222222222213
},
{
"cell_id": "A::mbpp_test_held::r5:pubmedqa_pqal",
"stage": "locality_single_anchor",
"task": "mbpp_test_held",
"target_domain": "code",
"anchor_ref": "r5:pubmedqa_pqal",
"anchor_name": "pubmedqa_pqal",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.9136675000190735,
"adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 50.918,
"base_Y": 0.23,
"oracle": 0.32,
"single_anchor_gap": 0.11111111111111091
},
{
"cell_id": "A::openbookqa_test::r4:humaneval",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9508311748504639,
"adapter_dir": "/workspace/round3_out/round4/Y/humaneval",
"accuracy": 0.7166666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 4.404,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.02439024390243918
},
{
"cell_id": "A::openbookqa_test::r4:mbpp",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:mbpp",
"anchor_name": "mbpp",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": -0.00021616967569570988,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp",
"accuracy": 0.6833333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 4.538,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.09756097560975592
},
{
"cell_id": "A::openbookqa_test::r4:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r4",
"anchor_domain": "code",
"cos_X": 0.9530814290046692,
"adapter_dir": "/workspace/round3_out/round4/Y/mbpp_sanitized",
"accuracy": 0.6933333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 4.846,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.060975609756097345
},
{
"cell_id": "A::openbookqa_test::r5:conala_curated",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:conala_curated",
"anchor_name": "conala_curated",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.8603157997131348,
"adapter_dir": "/workspace/round3_out/round5/Y/conala_curated",
"accuracy": 0.7233333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 28.747,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.04878048780487836
},
{
"cell_id": "A::openbookqa_test::r5:humaneval",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:humaneval",
"anchor_name": "humaneval",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": 0.9508311748504639,
"adapter_dir": "/workspace/round3_out/round5/Y/humaneval",
"accuracy": 0.7166666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 4.672,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.02439024390243918
},
{
"cell_id": "A::openbookqa_test::r5:mbpp_sanitized",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:mbpp_sanitized",
"anchor_name": "mbpp_sanitized",
"anchor_round": "r5",
"anchor_domain": "code",
"cos_X": -0.00021616967569570988,
"adapter_dir": "/workspace/round3_out/round5/Y/mbpp_sanitized",
"accuracy": 0.6833333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 4.696,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.09756097560975592
},
{
"cell_id": "A::openbookqa_test::r4:aqua_rat",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:aqua_rat",
"anchor_name": "aqua_rat",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.8845954537391663,
"adapter_dir": "/workspace/round3_out/round4/Y/aqua_rat",
"accuracy": 0.7,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 27.648,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.03658536585365857
},
{
"cell_id": "A::openbookqa_test::r4:gsm8k",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:gsm8k",
"anchor_name": "gsm8k",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": -0.0003817097167484462,
"adapter_dir": "/workspace/round3_out/round4/Y/gsm8k",
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 5.373,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.08536585365853654
},
{
"cell_id": "A::openbookqa_test::r4:math_algebra_easy",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:math_algebra_easy",
"anchor_name": "math_algebra_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9346193075180054,
"adapter_dir": "/workspace/round3_out/round4/Y/math_algebra_easy",
"accuracy": 0.7233333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 8.361,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.04878048780487836
},
{
"cell_id": "A::openbookqa_test::r4:math_counting_easy",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9526723623275757,
"adapter_dir": "/workspace/round3_out/round4/Y/math_counting_easy",
"accuracy": 0.7233333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 3.554,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.04878048780487836
},
{
"cell_id": "A::openbookqa_test::r4:multiarith",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:multiarith",
"anchor_name": "multiarith",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9391674399375916,
"adapter_dir": "/workspace/round3_out/round4/Y/multiarith",
"accuracy": 0.7266666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 4.66,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.060975609756097754
},
{
"cell_id": "A::openbookqa_test::r4:svamp",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:svamp",
"anchor_name": "svamp",
"anchor_round": "r4",
"anchor_domain": "math",
"cos_X": 0.9191423058509827,
"adapter_dir": "/workspace/round3_out/round4/Y/svamp",
"accuracy": 0.69,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 3.473,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.07317073170731714
},
{
"cell_id": "A::openbookqa_test::r5:aqua_rat_numeric",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:aqua_rat_numeric",
"anchor_name": "aqua_rat_numeric",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.0004580095992423594,
"adapter_dir": "/workspace/round3_out/round5/Y/aqua_rat_numeric",
"accuracy": 0.7566666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 26.635,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.17073170731707346
},
{
"cell_id": "A::openbookqa_test::r5:math_counting_easy",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:math_counting_easy",
"anchor_name": "math_counting_easy",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.00032379472395405173,
"adapter_dir": "/workspace/round3_out/round5/Y/math_counting_easy",
"accuracy": 0.7166666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 3.361,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.02439024390243918
},
{
"cell_id": "A::openbookqa_test::r5:mawps",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:mawps",
"anchor_name": "mawps",
"anchor_round": "r5",
"anchor_domain": "math",
"cos_X": -0.00039862250559963286,
"adapter_dir": "/workspace/round3_out/round5/Y/mawps",
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 6.23,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.07317073170731714
},
{
"cell_id": "A::openbookqa_test::r4:arc_easy",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:arc_easy",
"anchor_name": "arc_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.0005088147590868175,
"adapter_dir": "/workspace/round3_out/round4/Y/arc_easy",
"accuracy": 0.7166666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 16.606,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.02439024390243918
},
{
"cell_id": "A::openbookqa_test::r4:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.8757078647613525,
"adapter_dir": "/workspace/round3_out/round4/Y/medmcqa_easy",
"accuracy": 0.7133333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.411,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.012195121951219795
},
{
"cell_id": "A::openbookqa_test::r4:mmlu_elementary_math",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:mmlu_elementary_math",
"anchor_name": "mmlu_elementary_math",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9520111680030823,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_elementary_math",
"accuracy": 0.7133333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 29.065,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.012195121951219795
},
{
"cell_id": "A::openbookqa_test::r4:mmlu_high_school_biology",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:mmlu_high_school_biology",
"anchor_name": "mmlu_high_school_biology",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9565833806991577,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_biology",
"accuracy": 0.6733333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 28.46,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.1341463414634145
},
{
"cell_id": "A::openbookqa_test::r4:mmlu_high_school_physics",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:mmlu_high_school_physics",
"anchor_name": "mmlu_high_school_physics",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.9671817421913147,
"adapter_dir": "/workspace/round3_out/round4/Y/mmlu_high_school_physics",
"accuracy": 0.6966666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 28.106,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.04878048780487796
},
{
"cell_id": "A::openbookqa_test::r4:openbookqa",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:openbookqa",
"anchor_name": "openbookqa",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": 0.89091557264328,
"adapter_dir": "/workspace/round3_out/round4/Y/openbookqa",
"accuracy": 0.81,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 19.183,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.3658536585365857
},
{
"cell_id": "A::openbookqa_test::r4:sciq",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r4:sciq",
"anchor_name": "sciq",
"anchor_round": "r4",
"anchor_domain": "science",
"cos_X": -0.00015819823602214456,
"adapter_dir": "/workspace/round3_out/round4/Y/sciq",
"accuracy": 0.7033333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 28.125,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.024390243902438775
},
{
"cell_id": "A::openbookqa_test::r5:medmcqa_easy",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:medmcqa_easy",
"anchor_name": "medmcqa_easy",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8757078647613525,
"adapter_dir": "/workspace/round3_out/round5/Y/medmcqa_easy",
"accuracy": 0.7133333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 27.62,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": 0.012195121951219795
},
{
"cell_id": "A::openbookqa_test::r5:pubmedqa_pqal",
"stage": "locality_single_anchor",
"task": "openbookqa_test",
"target_domain": "science",
"anchor_ref": "r5:pubmedqa_pqal",
"anchor_name": "pubmedqa_pqal",
"anchor_round": "r5",
"anchor_domain": "science",
"cos_X": 0.8888986110687256,
"adapter_dir": "/workspace/round3_out/round5/Y/pubmedqa_pqal",
"accuracy": 0.6966666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 22.009,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"single_anchor_gap": -0.04878048780487796
}
]
}