Spaces:
Running
Running
| { | |
| "experiment": "EXP-G4_gamma_new_formula_validation", | |
| "date": "2026-04-18", | |
| "formula": "gamma = 1 - T_eval*sqrt(2)/theta", | |
| "models": [ | |
| { | |
| "prefix": "EleutherAI--pythia-70m", | |
| "pe_type": "RoPE", | |
| "notes": "", | |
| "theta": 10000, | |
| "d_head": 64, | |
| "T_train": 2048, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 0.7476017873166874, | |
| "gamma_new_pred": 0.717157287525381, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": 4.245163553501351, | |
| "err_old_pct": -25.239493568239496, | |
| "R2": 0.9824939571028048 | |
| }, | |
| { | |
| "prefix": "meta-llama--Meta-Llama-3-8B", | |
| "pe_type": "RoPE", | |
| "notes": "", | |
| "theta": 500000, | |
| "d_head": 128, | |
| "T_train": 8192, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 1.0454762537473639, | |
| "gamma_new_pred": 0.9943431457505076, | |
| "gamma_old_pred": 0.7018781400200674, | |
| "err_new_pct": 5.1424006104313325, | |
| "err_old_pct": 48.95409817400391, | |
| "R2": 0.9936044966891108 | |
| }, | |
| { | |
| "prefix": "Qwen--Qwen2.5-7B", | |
| "pe_type": "RoPE", | |
| "notes": "", | |
| "theta": 1000000, | |
| "d_head": 128, | |
| "T_train": 8192, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 0.9966953735480816, | |
| "gamma_new_pred": 0.9971715728752538, | |
| "gamma_old_pred": 0.6666637444455867, | |
| "err_new_pct": -0.047755004266634545, | |
| "err_old_pct": 49.5049613620367, | |
| "R2": 0.9993151965766267 | |
| }, | |
| { | |
| "prefix": "meta-llama--Llama-2-7b-hf", | |
| "pe_type": "RoPE", | |
| "notes": "known_artifact", | |
| "theta": 10000, | |
| "d_head": 128, | |
| "T_train": 4096, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 0.2870574377368437, | |
| "gamma_new_pred": 0.717157287525381, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": -59.97287586278841, | |
| "err_old_pct": -71.29413039896974, | |
| "R2": 0.9665564682676285 | |
| }, | |
| { | |
| "prefix": "mistralai--Mistral-7B-v0.1", | |
| "pe_type": "RoPE", | |
| "notes": "only_2pts", | |
| "theta": 10000, | |
| "d_head": 128, | |
| "T_train": 8192, | |
| "T_eval_max": 50.0, | |
| "n_points": 2, | |
| "gamma_obs": 1.213076772373502, | |
| "gamma_new_pred": 0.9929289321881345, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": 22.171560627226768, | |
| "err_old_pct": 21.30820897145836, | |
| "R2": 1.0 | |
| }, | |
| { | |
| "prefix": "mistralai--Mistral-Nemo-Instruct-2407", | |
| "pe_type": "RoPE", | |
| "notes": "partial_7pts", | |
| "theta": 1000000, | |
| "d_head": 128, | |
| "T_train": 131072, | |
| "T_eval_max": 500.0, | |
| "n_points": 7, | |
| "gamma_obs": 0.6382710387667986, | |
| "gamma_new_pred": 0.9992928932188134, | |
| "gamma_old_pred": 0.6666637444455867, | |
| "err_new_pct": -36.127731609211246, | |
| "err_old_pct": -4.2589245200967385, | |
| "R2": 0.9904023363048612 | |
| }, | |
| { | |
| "prefix": "google--gemma-2-9b-it", | |
| "pe_type": "RoPE", | |
| "notes": "partial", | |
| "theta": 10000, | |
| "d_head": 256, | |
| "T_train": 8192, | |
| "T_eval_max": 1000.0, | |
| "n_points": 8, | |
| "gamma_obs": 0.7791408207653451, | |
| "gamma_new_pred": 0.8585786437626906, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": -9.25224772062953, | |
| "err_old_pct": -22.085576398708866, | |
| "R2": 0.9981402455707831 | |
| }, | |
| { | |
| "prefix": "tiiuae--falcon-7b", | |
| "pe_type": "ALiBi", | |
| "notes": "exclude_no_rope", | |
| "theta": 10000, | |
| "d_head": 64, | |
| "T_train": 2048, | |
| "T_eval_max": 1000.0, | |
| "n_points": 8, | |
| "gamma_obs": 0.7865425131096547, | |
| "gamma_new_pred": 0.8585786437626906, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": -8.390161015109813, | |
| "err_old_pct": -21.34540391985647, | |
| "R2": 0.998318162512278 | |
| } | |
| ], | |
| "summary": { | |
| "n_clean": 5, | |
| "mean_err_new_pct": 10.963059699608019, | |
| "mean_err_old_pct": 30.008610804617142 | |
| } | |
| } |