Add complete model with LFS

Files changed (8) hide show

.gitattributes +1 -0
adapter_config.json +3 -53
adapter_model.safetensors +3 -0
rng_state.pth +3 -0
tokenizer.json +3 -0
tokenizer_config.json +3 -95
trainer_state.json +3 -489
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

adapter_config.json CHANGED Viewed

@@ -1,53 +1,3 @@
-{
-  "alora_invocation_tokens": null,
-  "alpha_pattern": {},
-  "arrow_config": null,
-  "auto_mapping": {
-    "base_model_class": "Gemma4ForConditionalGeneration",
-    "parent_library": "transformers.models.gemma4.modeling_gemma4",
-    "unsloth_fixed": true
-  },
-  "base_model_name_or_path": "0xSero/gemma-4-21b-a4b-it-REAP",
-  "bias": "none",
-  "corda_config": null,
-  "ensure_weight_tying": false,
-  "eva_config": null,
-  "exclude_modules": null,
-  "fan_in_fan_out": false,
-  "inference_mode": true,
-  "init_lora_weights": true,
-  "layer_replication": null,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "loftq_config": {},
-  "lora_alpha": 64,
-  "lora_bias": false,
-  "lora_dropout": 0,
-  "megatron_config": null,
-  "megatron_core": "megatron.core",
-  "modules_to_save": null,
-  "peft_type": "LORA",
-  "peft_version": "0.18.1",
-  "qalora_group_size": 16,
-  "r": 32,
-  "rank_pattern": {},
-  "revision": null,
-  "target_modules": [
-    "up_proj",
-    "o_proj",
-    "q_proj",
-    "k_proj",
-    "v_proj",
-    "down_proj",
-    "gate_proj"
-  ],
-  "target_parameters": [
-    "mlp.experts.gate_up_proj",
-    "mlp.experts.down_proj"
-  ],
-  "task_type": "CAUSAL_LM",
-  "trainable_token_indices": null,
-  "use_dora": false,
-  "use_qalora": false,
-  "use_rslora": false
-}

+version https://git-lfs.github.com/spec/v1
+oid sha256:1431581c9d913f62c52cfb0c1c7b5398742cad9b9e9eab670818c85c8cbdf7bf
+size 1272

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a207203dbc2efa5760e2408e2b65a9fb13a4b61dcaf2763584996d34fea9391
+size 237225680

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61c19bab1174704a4a4441475683bf1270277af15d2e2c95e964789128e482c4
+size 14645

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
+size 32169626

tokenizer_config.json CHANGED Viewed

@@ -1,95 +1,3 @@
-{
-  "audio_token": "<|audio|>",
-  "backend": "tokenizers",
-  "boa_token": "<|audio>",
-  "boi_token": "<|image>",
-  "bos_token": "<bos>",
-  "eoa_token": "<audio|>",
-  "eoc_token": "<channel|>",
-  "eoi_token": "<image|>",
-  "eos_token": "<eos>",
-  "eot_token": "<turn|>",
-  "escape_token": "<|\"|>",
-  "etc_token": "<tool_call|>",
-  "etd_token": "<tool|>",
-  "etr_token": "<tool_response|>",
-  "extra_special_tokens": [
-    "<|video|>"
-  ],
-  "image_token": "<|image|>",
-  "is_local": false,
-  "mask_token": "<mask>",
-  "model_max_length": 1000000000000000019884624838656,
-  "model_specific_special_tokens": {
-    "audio_token": "<|audio|>",
-    "boa_token": "<|audio>",
-    "boi_token": "<|image>",
-    "eoa_token": "<audio|>",
-    "eoc_token": "<channel|>",
-    "eoi_token": "<image|>",
-    "eot_token": "<turn|>",
-    "escape_token": "<|\"|>",
-    "etc_token": "<tool_call|>",
-    "etd_token": "<tool|>",
-    "etr_token": "<tool_response|>",
-    "image_token": "<|image|>",
-    "soc_token": "<|channel>",
-    "sot_token": "<|turn>",
-    "stc_token": "<|tool_call>",
-    "std_token": "<|tool>",
-    "str_token": "<|tool_response>",
-    "think_token": "<|think|>"
-  },
-  "pad_token": "<pad>",
-  "padding_side": "left",
-  "processor_class": "Gemma4Processor",
-  "response_schema": {
-    "properties": {
-      "content": {
-        "type": "string"
-      },
-      "role": {
-        "const": "assistant"
-      },
-      "thinking": {
-        "type": "string"
-      },
-      "tool_calls": {
-        "items": {
-          "properties": {
-            "function": {
-              "properties": {
-                "arguments": {
-                  "additionalProperties": {},
-                  "type": "object",
-                  "x-parser": "gemma4-tool-call"
-                },
-                "name": {
-                  "type": "string"
-                }
-              },
-              "type": "object",
-              "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
-            },
-            "type": {
-              "const": "function"
-            }
-          },
-          "type": "object"
-        },
-        "type": "array",
-        "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
-      }
-    },
-    "type": "object",
-    "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
-  },
-  "soc_token": "<|channel>",
-  "sot_token": "<|turn>",
-  "stc_token": "<|tool_call>",
-  "std_token": "<|tool>",
-  "str_token": "<|tool_response>",
-  "think_token": "<|think|>",
-  "tokenizer_class": "GemmaTokenizer",
-  "unk_token": "<unk>"
-}

+version https://git-lfs.github.com/spec/v1
+oid sha256:820ad481cb4152f8df6061f4ae6ae20255777c39b77a9970f1e3cbd427b21511
+size 2686

trainer_state.json CHANGED Viewed

@@ -1,489 +1,3 @@
-{
-  "best_global_step": null,
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.6845708267509215,
-  "eval_steps": 500,
-  "global_step": 325,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.010531858873091101,
-      "grad_norm": 7.945572853088379,
-      "learning_rate": 4.4444444444444447e-05,
-      "loss": 13.0722412109375,
-      "step": 5
-    },
-    {
-      "epoch": 0.021063717746182202,
-      "grad_norm": 2.1900172233581543,
-      "learning_rate": 0.0001,
-      "loss": 8.607737731933593,
-      "step": 10
-    },
-    {
-      "epoch": 0.0315955766192733,
-      "grad_norm": 1.9897981882095337,
-      "learning_rate": 0.00015555555555555556,
-      "loss": 5.214687728881836,
-      "step": 15
-    },
-    {
-      "epoch": 0.042127435492364404,
-      "grad_norm": 0.5303641557693481,
-      "learning_rate": 0.0002111111111111111,
-      "loss": 5.304478454589844,
-      "step": 20
-    },
-    {
-      "epoch": 0.0526592943654555,
-      "grad_norm": 0.6158832907676697,
-      "learning_rate": 0.0002666666666666667,
-      "loss": 4.097989273071289,
-      "step": 25
-    },
-    {
-      "epoch": 0.0631911532385466,
-      "grad_norm": 0.6168912053108215,
-      "learning_rate": 0.0003222222222222222,
-      "loss": 4.195485305786133,
-      "step": 30
-    },
-    {
-      "epoch": 0.0737230121116377,
-      "grad_norm": 1.1824705600738525,
-      "learning_rate": 0.00037777777777777777,
-      "loss": 3.989824676513672,
-      "step": 35
-    },
-    {
-      "epoch": 0.08425487098472881,
-      "grad_norm": 23.726043701171875,
-      "learning_rate": 0.00043333333333333337,
-      "loss": 3.813393783569336,
-      "step": 40
-    },
-    {
-      "epoch": 0.0947867298578199,
-      "grad_norm": 0.9648836851119995,
-      "learning_rate": 0.0004888888888888889,
-      "loss": 3.9115650177001955,
-      "step": 45
-    },
-    {
-      "epoch": 0.105318588730911,
-      "grad_norm": 1.5551100969314575,
-      "learning_rate": 0.0004999906760259783,
-      "loss": 3.9463314056396483,
-      "step": 50
-    },
-    {
-      "epoch": 0.11585044760400211,
-      "grad_norm": 14.366273880004883,
-      "learning_rate": 0.0004999527985734932,
-      "loss": 3.3652118682861327,
-      "step": 55
-    },
-    {
-      "epoch": 0.1263823064770932,
-      "grad_norm": 0.7795851230621338,
-      "learning_rate": 0.0004998857893053613,
-      "loss": 4.018484497070313,
-      "step": 60
-    },
-    {
-      "epoch": 0.1369141653501843,
-      "grad_norm": 0.8944413065910339,
-      "learning_rate": 0.0004997896560314641,
-      "loss": 3.566949462890625,
-      "step": 65
-    },
-    {
-      "epoch": 0.1474460242232754,
-      "grad_norm": 0.4758361876010895,
-      "learning_rate": 0.0004996644099560641,
-      "loss": 3.5043781280517576,
-      "step": 70
-    },
-    {
-      "epoch": 0.1579778830963665,
-      "grad_norm": 0.5162303447723389,
-      "learning_rate": 0.0004995100656764997,
-      "loss": 3.212859344482422,
-      "step": 75
-    },
-    {
-      "epoch": 0.16850974196945762,
-      "grad_norm": 0.4172177314758301,
-      "learning_rate": 0.0004993266411814837,
-      "loss": 3.3035125732421875,
-      "step": 80
-    },
-    {
-      "epoch": 0.17904160084254872,
-      "grad_norm": 4.056116580963135,
-      "learning_rate": 0.0004991141578490066,
-      "loss": 3.1954784393310547,
-      "step": 85
-    },
-    {
-      "epoch": 0.1895734597156398,
-      "grad_norm": 2.0480830669403076,
-      "learning_rate": 0.0004988726404438453,
-      "loss": 3.452958679199219,
-      "step": 90
-    },
-    {
-      "epoch": 0.2001053185887309,
-      "grad_norm": 0.3407100439071655,
-      "learning_rate": 0.0004986021171146765,
-      "loss": 3.66046028137207,
-      "step": 95
-    },
-    {
-      "epoch": 0.210637177461822,
-      "grad_norm": 0.6314069628715515,
-      "learning_rate": 0.0004983026193907961,
-      "loss": 3.37341194152832,
-      "step": 100
-    },
-    {
-      "epoch": 0.2211690363349131,
-      "grad_norm": 0.29125332832336426,
-      "learning_rate": 0.0004979741821784446,
-      "loss": 3.2889537811279297,
-      "step": 105
-    },
-    {
-      "epoch": 0.23170089520800422,
-      "grad_norm": 0.3563121259212494,
-      "learning_rate": 0.0004976168437567384,
-      "loss": 3.55091552734375,
-      "step": 110
-    },
-    {
-      "epoch": 0.24223275408109532,
-      "grad_norm": 0.24429726600646973,
-      "learning_rate": 0.0004972306457732091,
-      "loss": 3.087770462036133,
-      "step": 115
-    },
-    {
-      "epoch": 0.2527646129541864,
-      "grad_norm": 0.36369070410728455,
-      "learning_rate": 0.0004968156332389489,
-      "loss": 3.1231616973876952,
-      "step": 120
-    },
-    {
-      "epoch": 0.2632964718272775,
-      "grad_norm": 0.39639607071876526,
-      "learning_rate": 0.0004963718545233645,
-      "loss": 3.043613052368164,
-      "step": 125
-    },
-    {
-      "epoch": 0.2738283307003686,
-      "grad_norm": 0.2886509299278259,
-      "learning_rate": 0.0004958993613485406,
-      "loss": 3.7740196228027343,
-      "step": 130
-    },
-    {
-      "epoch": 0.2843601895734597,
-      "grad_norm": 0.31644657254219055,
-      "learning_rate": 0.0004953982087832103,
-      "loss": 2.8986371994018554,
-      "step": 135
-    },
-    {
-      "epoch": 0.2948920484465508,
-      "grad_norm": 0.3337455689907074,
-      "learning_rate": 0.0004948684552363386,
-      "loss": 2.7170455932617186,
-      "step": 140
-    },
-    {
-      "epoch": 0.3054239073196419,
-      "grad_norm": 0.2488539218902588,
-      "learning_rate": 0.0004943101624503133,
-      "loss": 2.9157670974731444,
-      "step": 145
-    },
-    {
-      "epoch": 0.315955766192733,
-      "grad_norm": 0.42236021161079407,
-      "learning_rate": 0.0004937233954937497,
-      "loss": 2.7440420150756837,
-      "step": 150
-    },
-    {
-      "epoch": 0.3264876250658241,
-      "grad_norm": 0.34990763664245605,
-      "learning_rate": 0.000493108222753907,
-      "loss": 3.266927719116211,
-      "step": 155
-    },
-    {
-      "epoch": 0.33701948393891523,
-      "grad_norm": 0.33922290802001953,
-      "learning_rate": 0.0004924647159287175,
-      "loss": 3.5011825561523438,
-      "step": 160
-    },
-    {
-      "epoch": 0.34755134281200634,
-      "grad_norm": 0.45363396406173706,
-      "learning_rate": 0.0004917929500184307,
-      "loss": 2.768464469909668,
-      "step": 165
-    },
-    {
-      "epoch": 0.35808320168509744,
-      "grad_norm": 0.29426634311676025,
-      "learning_rate": 0.0004910930033168714,
-      "loss": 3.0745222091674806,
-      "step": 170
-    },
-    {
-      "epoch": 0.36861506055818855,
-      "grad_norm": 0.48457714915275574,
-      "learning_rate": 0.0004903649574023151,
-      "loss": 3.1395895004272463,
-      "step": 175
-    },
-    {
-      "epoch": 0.3791469194312796,
-      "grad_norm": 0.28997525572776794,
-      "learning_rate": 0.0004896088971279799,
-      "loss": 2.9532394409179688,
-      "step": 180
-    },
-    {
-      "epoch": 0.3896787783043707,
-      "grad_norm": 0.3158065378665924,
-      "learning_rate": 0.0004888249106121375,
-      "loss": 3.242550277709961,
-      "step": 185
-    },
-    {
-      "epoch": 0.4002106371774618,
-      "grad_norm": 0.25733378529548645,
-      "learning_rate": 0.00048801308922784194,
-      "loss": 2.863876533508301,
-      "step": 190
-    },
-    {
-      "epoch": 0.4107424960505529,
-      "grad_norm": 0.39928656816482544,
-      "learning_rate": 0.00048717352759228115,
-      "loss": 2.9123611450195312,
-      "step": 195
-    },
-    {
-      "epoch": 0.421274354923644,
-      "grad_norm": 0.3962703049182892,
-      "learning_rate": 0.00048630632355574924,
-      "loss": 3.159450149536133,
-      "step": 200
-    },
-    {
-      "epoch": 0.4318062137967351,
-      "grad_norm": 1.542340874671936,
-      "learning_rate": 0.0004854115781902414,
-      "loss": 2.832276725769043,
-      "step": 205
-    },
-    {
-      "epoch": 0.4423380726698262,
-      "grad_norm": 0.36583542823791504,
-      "learning_rate": 0.00048448939577767485,
-      "loss": 2.690146064758301,
-      "step": 210
-    },
-    {
-      "epoch": 0.4528699315429173,
-      "grad_norm": 0.3314652740955353,
-      "learning_rate": 0.0004835398837977346,
-      "loss": 2.8558319091796873,
-      "step": 215
-    },
-    {
-      "epoch": 0.46340179041600843,
-      "grad_norm": 0.30962228775024414,
-      "learning_rate": 0.0004825631529153466,
-      "loss": 2.9323553085327148,
-      "step": 220
-    },
-    {
-      "epoch": 0.47393364928909953,
-      "grad_norm": 1.0505726337432861,
-      "learning_rate": 0.0004815593169677799,
-      "loss": 2.648083305358887,
-      "step": 225
-    },
-    {
-      "epoch": 0.48446550816219064,
-      "grad_norm": 0.4935843050479889,
-      "learning_rate": 0.00048052849295137924,
-      "loss": 2.8767162322998048,
-      "step": 230
-    },
-    {
-      "epoch": 0.49499736703528174,
-      "grad_norm": 0.3319757580757141,
-      "learning_rate": 0.00047947080100792884,
-      "loss": 3.111905097961426,
-      "step": 235
-    },
-    {
-      "epoch": 0.5055292259083728,
-      "grad_norm": 1.3790903091430664,
-      "learning_rate": 0.0004783863644106502,
-      "loss": 3.082983207702637,
-      "step": 240
-    },
-    {
-      "epoch": 0.516061084781464,
-      "grad_norm": 0.2825748026371002,
-      "learning_rate": 0.00047727530954983475,
-      "loss": 2.5215627670288088,
-      "step": 245
-    },
-    {
-      "epoch": 0.526592943654555,
-      "grad_norm": 0.7788515090942383,
-      "learning_rate": 0.00047613776591811296,
-      "loss": 3.078541564941406,
-      "step": 250
-    },
-    {
-      "epoch": 0.5371248025276462,
-      "grad_norm": 0.2866535186767578,
-      "learning_rate": 0.0004749738660953621,
-      "loss": 2.795052909851074,
-      "step": 255
-    },
-    {
-      "epoch": 0.5476566614007372,
-      "grad_norm": 0.35526394844055176,
-      "learning_rate": 0.00047378374573325415,
-      "loss": 2.6752908706665037,
-      "step": 260
-    },
-    {
-      "epoch": 0.5581885202738284,
-      "grad_norm": 0.38038304448127747,
-      "learning_rate": 0.00047256754353944605,
-      "loss": 2.8107267379760743,
-      "step": 265
-    },
-    {
-      "epoch": 0.5687203791469194,
-      "grad_norm": 0.3206808269023895,
-      "learning_rate": 0.00047132540126141275,
-      "loss": 2.705642509460449,
-      "step": 270
-    },
-    {
-      "epoch": 0.5792522380200106,
-      "grad_norm": 0.2499801069498062,
-      "learning_rate": 0.0004700574636699273,
-      "loss": 2.475233459472656,
-      "step": 275
-    },
-    {
-      "epoch": 0.5897840968931016,
-      "grad_norm": 0.26146742701530457,
-      "learning_rate": 0.0004687638785421875,
-      "loss": 2.7663032531738283,
-      "step": 280
-    },
-    {
-      "epoch": 0.6003159557661928,
-      "grad_norm": 0.3592541515827179,
-      "learning_rate": 0.00046744479664459276,
-      "loss": 2.623735046386719,
-      "step": 285
-    },
-    {
-      "epoch": 0.6108478146392838,
-      "grad_norm": 0.31415146589279175,
-      "learning_rate": 0.00046610037171517237,
-      "loss": 2.87611083984375,
-      "step": 290
-    },
-    {
-      "epoch": 0.6213796735123749,
-      "grad_norm": 0.2771868109703064,
-      "learning_rate": 0.00046473076044566743,
-      "loss": 2.7714420318603517,
-      "step": 295
-    },
-    {
-      "epoch": 0.631911532385466,
-      "grad_norm": 0.4841095209121704,
-      "learning_rate": 0.0004633361224632685,
-      "loss": 2.344076728820801,
-      "step": 300
-    },
-    {
-      "epoch": 0.6424433912585571,
-      "grad_norm": 0.3907207250595093,
-      "learning_rate": 0.00046191662031201116,
-      "loss": 2.6539451599121096,
-      "step": 305
-    },
-    {
-      "epoch": 0.6529752501316483,
-      "grad_norm": 0.25700584053993225,
-      "learning_rate": 0.00046047241943383176,
-      "loss": 2.7396242141723635,
-      "step": 310
-    },
-    {
-      "epoch": 0.6635071090047393,
-      "grad_norm": 0.30718255043029785,
-      "learning_rate": 0.00045900368814928504,
-      "loss": 2.8544797897338867,
-      "step": 315
-    },
-    {
-      "epoch": 0.6740389678778305,
-      "grad_norm": 0.30769050121307373,
-      "learning_rate": 0.00045751059763792694,
-      "loss": 2.611779975891113,
-      "step": 320
-    },
-    {
-      "epoch": 0.6845708267509215,
-      "grad_norm": 0.24997501075267792,
-      "learning_rate": 0.00045599332191836314,
-      "loss": 2.708077239990234,
-      "step": 325
-    }
-  ],
-  "logging_steps": 5,
-  "max_steps": 1500,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 4,
-  "save_steps": 25,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 6.564817736409073e+17,
-  "train_batch_size": 2,
-  "trial_name": null,
-  "trial_params": null
-}

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a644e82ab43b487810df128783a041ad14cadbdb92057386bef746e72976478
+size 12792

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6cb092f02e7015f02eb90df660f71bcf38fa540b6f30d3bb40afc2dacc7c95e
+size 5201