Spaces:
Running
Running
| { | |
| "pythia-70m": { | |
| "model": "pythia-70m", | |
| "pe": "RoPE", | |
| "d_model": 512, | |
| "L_crit": 4, | |
| "f_active": 0.374, | |
| "rank90_pred": 191, | |
| "k_thresh_rho": 191, | |
| "layers": [ | |
| { | |
| "layer": 0, | |
| "label": "pre_Lcrit", | |
| "rank90": 193, | |
| "rank50": 55, | |
| "rho": 0.8981, | |
| "fro_norm": 10.7524, | |
| "rank90_frac": 0.377, | |
| "rank90_err_pct": 1.0 | |
| }, | |
| { | |
| "layer": 1, | |
| "label": "pre_Lcrit", | |
| "rank90": 230, | |
| "rank50": 78, | |
| "rho": 0.8394, | |
| "fro_norm": 11.6785, | |
| "rank90_frac": 0.4492, | |
| "rank90_err_pct": 20.4 | |
| }, | |
| { | |
| "layer": 2, | |
| "label": "pre_Lcrit", | |
| "rank90": 180, | |
| "rank50": 53, | |
| "rho": 0.9157, | |
| "fro_norm": 13.7552, | |
| "rank90_frac": 0.3516, | |
| "rank90_err_pct": -5.8 | |
| }, | |
| { | |
| "layer": 3, | |
| "label": "pre_Lcrit", | |
| "rank90": 188, | |
| "rank50": 53, | |
| "rho": 0.9051, | |
| "fro_norm": 18.607, | |
| "rank90_frac": 0.3672, | |
| "rank90_err_pct": -1.6 | |
| }, | |
| { | |
| "layer": 4, | |
| "label": "at_Lcrit", | |
| "rank90": 205, | |
| "rank50": 69, | |
| "rho": 0.8797, | |
| "fro_norm": 8.1702, | |
| "rank90_frac": 0.4004, | |
| "rank90_err_pct": 7.3 | |
| }, | |
| { | |
| "layer": 5, | |
| "label": "post_Lcrit", | |
| "rank90": 240, | |
| "rank50": 82, | |
| "rho": 0.8183, | |
| "fro_norm": 8.2678, | |
| "rank90_frac": 0.4688, | |
| "rank90_err_pct": 25.7 | |
| } | |
| ], | |
| "summary": { | |
| "mean_rank90": 206.0, | |
| "std_rank90": 21.99, | |
| "pre_Lcrit_mean_rho": 0.8896, | |
| "post_Lcrit_mean_rho": 0.8183 | |
| } | |
| }, | |
| "gpt2-medium": { | |
| "model": "gpt2-medium", | |
| "pe": "AbsPE", | |
| "d_model": 1024, | |
| "L_crit": 23, | |
| "f_active": null, | |
| "rank90_pred": null, | |
| "k_thresh_rho": 512, | |
| "layers": [ | |
| { | |
| "layer": 0, | |
| "label": "pre_Lcrit", | |
| "rank90": 419, | |
| "rank50": 102, | |
| "rho": 0.9437, | |
| "fro_norm": 43.0384, | |
| "rank90_frac": 0.4092 | |
| }, | |
| { | |
| "layer": 1, | |
| "label": "pre_Lcrit", | |
| "rank90": 268, | |
| "rank50": 20, | |
| "rho": 0.9792, | |
| "fro_norm": 92.178, | |
| "rank90_frac": 0.2617 | |
| }, | |
| { | |
| "layer": 2, | |
| "label": "pre_Lcrit", | |
| "rank90": 392, | |
| "rank50": 97, | |
| "rho": 0.9542, | |
| "fro_norm": 72.4524, | |
| "rank90_frac": 0.3828 | |
| }, | |
| { | |
| "layer": 3, | |
| "label": "pre_Lcrit", | |
| "rank90": 429, | |
| "rank50": 126, | |
| "rho": 0.9423, | |
| "fro_norm": 69.9871, | |
| "rank90_frac": 0.4189 | |
| }, | |
| { | |
| "layer": 4, | |
| "label": "pre_Lcrit", | |
| "rank90": 431, | |
| "rank50": 125, | |
| "rho": 0.9418, | |
| "fro_norm": 72.3096, | |
| "rank90_frac": 0.4209 | |
| }, | |
| { | |
| "layer": 5, | |
| "label": "pre_Lcrit", | |
| "rank90": 437, | |
| "rank50": 131, | |
| "rho": 0.9398, | |
| "fro_norm": 78.4996, | |
| "rank90_frac": 0.4268 | |
| }, | |
| { | |
| "layer": 6, | |
| "label": "pre_Lcrit", | |
| "rank90": 431, | |
| "rank50": 121, | |
| "rho": 0.9413, | |
| "fro_norm": 84.0501, | |
| "rank90_frac": 0.4209 | |
| }, | |
| { | |
| "layer": 7, | |
| "label": "pre_Lcrit", | |
| "rank90": 430, | |
| "rank50": 130, | |
| "rho": 0.9424, | |
| "fro_norm": 87.7387, | |
| "rank90_frac": 0.4199 | |
| }, | |
| { | |
| "layer": 8, | |
| "label": "pre_Lcrit", | |
| "rank90": 415, | |
| "rank50": 119, | |
| "rho": 0.9479, | |
| "fro_norm": 91.8613, | |
| "rank90_frac": 0.4053 | |
| }, | |
| { | |
| "layer": 9, | |
| "label": "pre_Lcrit", | |
| "rank90": 406, | |
| "rank50": 96, | |
| "rho": 0.9503, | |
| "fro_norm": 98.2563, | |
| "rank90_frac": 0.3965 | |
| }, | |
| { | |
| "layer": 10, | |
| "label": "pre_Lcrit", | |
| "rank90": 393, | |
| "rank50": 89, | |
| "rho": 0.9538, | |
| "fro_norm": 97.6642, | |
| "rank90_frac": 0.3838 | |
| }, | |
| { | |
| "layer": 11, | |
| "label": "pre_Lcrit", | |
| "rank90": 420, | |
| "rank50": 113, | |
| "rho": 0.9453, | |
| "fro_norm": 97.9002, | |
| "rank90_frac": 0.4102 | |
| }, | |
| { | |
| "layer": 12, | |
| "label": "pre_Lcrit", | |
| "rank90": 405, | |
| "rank50": 114, | |
| "rho": 0.9504, | |
| "fro_norm": 96.6644, | |
| "rank90_frac": 0.3955 | |
| }, | |
| { | |
| "layer": 13, | |
| "label": "pre_Lcrit", | |
| "rank90": 428, | |
| "rank50": 130, | |
| "rho": 0.9426, | |
| "fro_norm": 94.9417, | |
| "rank90_frac": 0.418 | |
| }, | |
| { | |
| "layer": 14, | |
| "label": "pre_Lcrit", | |
| "rank90": 460, | |
| "rank50": 145, | |
| "rho": 0.9289, | |
| "fro_norm": 95.5705, | |
| "rank90_frac": 0.4492 | |
| }, | |
| { | |
| "layer": 15, | |
| "label": "pre_Lcrit", | |
| "rank90": 419, | |
| "rank50": 117, | |
| "rho": 0.9465, | |
| "fro_norm": 100.0642, | |
| "rank90_frac": 0.4092 | |
| }, | |
| { | |
| "layer": 16, | |
| "label": "pre_Lcrit", | |
| "rank90": 458, | |
| "rank50": 144, | |
| "rho": 0.9291, | |
| "fro_norm": 103.4509, | |
| "rank90_frac": 0.4473 | |
| }, | |
| { | |
| "layer": 17, | |
| "label": "pre_Lcrit", | |
| "rank90": 448, | |
| "rank50": 140, | |
| "rho": 0.9345, | |
| "fro_norm": 105.379, | |
| "rank90_frac": 0.4375 | |
| }, | |
| { | |
| "layer": 18, | |
| "label": "pre_Lcrit", | |
| "rank90": 479, | |
| "rank50": 157, | |
| "rho": 0.9192, | |
| "fro_norm": 108.1156, | |
| "rank90_frac": 0.4678 | |
| }, | |
| { | |
| "layer": 19, | |
| "label": "pre_Lcrit", | |
| "rank90": 505, | |
| "rank50": 162, | |
| "rho": 0.9046, | |
| "fro_norm": 115.4507, | |
| "rank90_frac": 0.4932 | |
| }, | |
| { | |
| "layer": 20, | |
| "label": "pre_Lcrit", | |
| "rank90": 498, | |
| "rank50": 163, | |
| "rho": 0.9089, | |
| "fro_norm": 117.7071, | |
| "rank90_frac": 0.4863 | |
| }, | |
| { | |
| "layer": 21, | |
| "label": "pre_Lcrit", | |
| "rank90": 499, | |
| "rank50": 153, | |
| "rho": 0.908, | |
| "fro_norm": 123.4158, | |
| "rank90_frac": 0.4873 | |
| }, | |
| { | |
| "layer": 22, | |
| "label": "pre_Lcrit", | |
| "rank90": 490, | |
| "rank50": 141, | |
| "rho": 0.913, | |
| "fro_norm": 132.9368, | |
| "rank90_frac": 0.4785 | |
| }, | |
| { | |
| "layer": 23, | |
| "label": "at_Lcrit", | |
| "rank90": 408, | |
| "rank50": 45, | |
| "rho": 0.9462, | |
| "fro_norm": 145.1893, | |
| "rank90_frac": 0.3984 | |
| } | |
| ], | |
| "summary": { | |
| "mean_rank90": 432.0, | |
| "std_rank90": 47.56, | |
| "pre_Lcrit_mean_rho": 0.9377, | |
| "post_Lcrit_mean_rho": 0.0 | |
| } | |
| }, | |
| "pythia-1b": { | |
| "model": "pythia-1b", | |
| "pe": "RoPE", | |
| "d_model": 2048, | |
| "L_crit": 15, | |
| "f_active": 0.128, | |
| "rank90_pred": 262, | |
| "k_thresh_rho": 262, | |
| "layers": [ | |
| { | |
| "layer": 0, | |
| "label": "pre_Lcrit", | |
| "rank90": 810, | |
| "rank50": 211, | |
| "rho": 0.563, | |
| "fro_norm": 36.8091, | |
| "rank90_frac": 0.3955, | |
| "rank90_err_pct": 209.2 | |
| }, | |
| { | |
| "layer": 1, | |
| "label": "pre_Lcrit", | |
| "rank90": 894, | |
| "rank50": 265, | |
| "rho": 0.4974, | |
| "fro_norm": 32.1104, | |
| "rank90_frac": 0.4365, | |
| "rank90_err_pct": 241.2 | |
| }, | |
| { | |
| "layer": 2, | |
| "label": "pre_Lcrit", | |
| "rank90": 930, | |
| "rank50": 280, | |
| "rho": 0.4807, | |
| "fro_norm": 35.3559, | |
| "rank90_frac": 0.4541, | |
| "rank90_err_pct": 255.0 | |
| }, | |
| { | |
| "layer": 3, | |
| "label": "pre_Lcrit", | |
| "rank90": 893, | |
| "rank50": 278, | |
| "rho": 0.4817, | |
| "fro_norm": 36.2661, | |
| "rank90_frac": 0.436, | |
| "rank90_err_pct": 240.8 | |
| }, | |
| { | |
| "layer": 4, | |
| "label": "pre_Lcrit", | |
| "rank90": 917, | |
| "rank50": 274, | |
| "rho": 0.4865, | |
| "fro_norm": 37.3924, | |
| "rank90_frac": 0.4478, | |
| "rank90_err_pct": 250.0 | |
| }, | |
| { | |
| "layer": 5, | |
| "label": "pre_Lcrit", | |
| "rank90": 899, | |
| "rank50": 273, | |
| "rho": 0.4879, | |
| "fro_norm": 40.6263, | |
| "rank90_frac": 0.439, | |
| "rank90_err_pct": 243.1 | |
| }, | |
| { | |
| "layer": 6, | |
| "label": "pre_Lcrit", | |
| "rank90": 865, | |
| "rank50": 247, | |
| "rho": 0.518, | |
| "fro_norm": 42.9162, | |
| "rank90_frac": 0.4224, | |
| "rank90_err_pct": 230.2 | |
| }, | |
| { | |
| "layer": 7, | |
| "label": "pre_Lcrit", | |
| "rank90": 907, | |
| "rank50": 295, | |
| "rho": 0.4617, | |
| "fro_norm": 42.1987, | |
| "rank90_frac": 0.4429, | |
| "rank90_err_pct": 246.2 | |
| }, | |
| { | |
| "layer": 8, | |
| "label": "pre_Lcrit", | |
| "rank90": 837, | |
| "rank50": 235, | |
| "rho": 0.5343, | |
| "fro_norm": 44.9326, | |
| "rank90_frac": 0.4087, | |
| "rank90_err_pct": 219.5 | |
| }, | |
| { | |
| "layer": 9, | |
| "label": "pre_Lcrit", | |
| "rank90": 903, | |
| "rank50": 291, | |
| "rho": 0.4654, | |
| "fro_norm": 45.1014, | |
| "rank90_frac": 0.4409, | |
| "rank90_err_pct": 244.7 | |
| }, | |
| { | |
| "layer": 10, | |
| "label": "pre_Lcrit", | |
| "rank90": 960, | |
| "rank50": 322, | |
| "rho": 0.4331, | |
| "fro_norm": 45.0008, | |
| "rank90_frac": 0.4688, | |
| "rank90_err_pct": 266.4 | |
| }, | |
| { | |
| "layer": 11, | |
| "label": "pre_Lcrit", | |
| "rank90": 901, | |
| "rank50": 290, | |
| "rho": 0.4662, | |
| "fro_norm": 52.5301, | |
| "rank90_frac": 0.4399, | |
| "rank90_err_pct": 243.9 | |
| }, | |
| { | |
| "layer": 12, | |
| "label": "pre_Lcrit", | |
| "rank90": 947, | |
| "rank50": 311, | |
| "rho": 0.4436, | |
| "fro_norm": 52.3493, | |
| "rank90_frac": 0.4624, | |
| "rank90_err_pct": 261.5 | |
| }, | |
| { | |
| "layer": 13, | |
| "label": "pre_Lcrit", | |
| "rank90": 1001, | |
| "rank50": 348, | |
| "rho": 0.4063, | |
| "fro_norm": 54.4121, | |
| "rank90_frac": 0.4888, | |
| "rank90_err_pct": 282.1 | |
| }, | |
| { | |
| "layer": 14, | |
| "label": "pre_Lcrit", | |
| "rank90": 1015, | |
| "rank50": 352, | |
| "rho": 0.4052, | |
| "fro_norm": 51.9307, | |
| "rank90_frac": 0.4956, | |
| "rank90_err_pct": 287.4 | |
| }, | |
| { | |
| "layer": 15, | |
| "label": "at_Lcrit", | |
| "rank90": 1039, | |
| "rank50": 364, | |
| "rho": 0.3934, | |
| "fro_norm": 51.225, | |
| "rank90_frac": 0.5073, | |
| "rank90_err_pct": 296.6 | |
| } | |
| ], | |
| "summary": { | |
| "mean_rank90": 919.9, | |
| "std_rank90": 59.62, | |
| "pre_Lcrit_mean_rho": 0.4754, | |
| "post_Lcrit_mean_rho": 0.0 | |
| } | |
| } | |
| } |