Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +9 -9
adapter_config.json +8 -11
adapter_model.safetensors +2 -2
chat_template.jinja +23 -46
processor_config.json +17 -64
tokenizer.json +2 -2
tokenizer.model +3 -0
tokenizer_config.json +0 -0
train_summary.json +7 -7

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
-base_model: unsloth/gemma-4-26b-a4b-it
 library_name: peft
 pipeline_tag: text-generation
 tags:
   - lora
   - peft
-  - gemma-4
   - adhd
   - neurodivergent
   - task-initiation
@@ -15,7 +15,7 @@ license: other
 # NeuroBait
-NeuroBait is a LoRA fine-tune of `unsloth/gemma-4-26b-a4b-it` for ADHD and
 neurodivergent task-initiation conversations. It is designed to produce warm,
 short, agency-preserving prose that helps a user start one tiny next move
 without turning the conversation into a full to-do list.
@@ -54,7 +54,7 @@ The dataset is not included in this model repo.
 ## Training Configuration
-- Base: `unsloth/gemma-4-26b-a4b-it`
 - Method: 16-bit LoRA
 - LoRA rank: 16
 - LoRA alpha: 16
@@ -68,8 +68,8 @@ The dataset is not included in this model repo.
 - Warmup ratio: 0.05
 - Optimizer: adamw 8-bit
 - Precision: bf16
-- Chat template: `gemma-4`
-- Response-only markers: `<|turn>user\n` / `<|turn>model\n`
 Training ran on Modal with an H100 80GB GPU.
@@ -109,14 +109,14 @@ Example adapter loading path:
 ```python
 from peft import PeftModel
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
-base_model = "unsloth/gemma-4-26b-a4b-it"
 adapter_id = "build-small-hackathon/NeuroBait"
 tokenizer = AutoTokenizer.from_pretrained(adapter_id)
-model = AutoModelForCausalLM.from_pretrained(
     base_model,
     quantization_config=BitsAndBytesConfig(
         load_in_4bit=True,

 ---
+base_model: unsloth/gemma-3-12b-it
 library_name: peft
 pipeline_tag: text-generation
 tags:
   - lora
   - peft
+  - gemma-3
   - adhd
   - neurodivergent
   - task-initiation
 # NeuroBait
+NeuroBait is a LoRA fine-tune of `unsloth/gemma-3-12b-it` for ADHD and
 neurodivergent task-initiation conversations. It is designed to produce warm,
 short, agency-preserving prose that helps a user start one tiny next move
 without turning the conversation into a full to-do list.
 ## Training Configuration
+- Base: `unsloth/gemma-3-12b-it` (dense Gemma 3)
 - Method: 16-bit LoRA
 - LoRA rank: 16
 - LoRA alpha: 16
 - Warmup ratio: 0.05
 - Optimizer: adamw 8-bit
 - Precision: bf16
+- Chat template: `gemma-3`
+- Response-only markers: `<start_of_turn>user\n` / `<start_of_turn>model\n`
 Training ran on Modal with an H100 80GB GPU.
 ```python
 from peft import PeftModel
+from transformers import AutoModelForImageTextToText, AutoTokenizer, BitsAndBytesConfig
 import torch
+base_model = "unsloth/gemma-3-12b-it"
 adapter_id = "build-small-hackathon/NeuroBait"
 tokenizer = AutoTokenizer.from_pretrained(adapter_id)
+model = AutoModelForImageTextToText.from_pretrained(
     base_model,
     quantization_config=BitsAndBytesConfig(
         load_in_4bit=True,

adapter_config.json CHANGED Viewed

@@ -3,11 +3,11 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": {
-    "base_model_class": "Gemma4ForConditionalGeneration",
-    "parent_library": "transformers.models.gemma4.modeling_gemma4",
     "unsloth_fixed": true
   },
-  "base_model_name_or_path": "unsloth/gemma-4-26b-a4b-it",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -34,18 +34,15 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "o_proj",
-    "k_proj",
-    "gate_proj",
     "up_proj",
-    "down_proj",
-    "q_proj"
-  ],
-  "target_parameters": [
-    "experts.gate_up_proj",
-    "experts.down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_bdlora": null,

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": {
+    "base_model_class": "Gemma3ForConditionalGeneration",
+    "parent_library": "transformers.models.gemma3.modeling_gemma3",
     "unsloth_fixed": true
   },
+  "base_model_name_or_path": "unsloth/gemma-3-12b-it",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
     "v_proj",
     "o_proj",
     "up_proj",
+    "gate_proj",
+    "q_proj",
+    "k_proj"
   ],
+  "target_parameters": null,
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_bdlora": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:686c075b96bc79f5208bf275598f5a344142e05131d171b191e632823cd8e5ad
-size 1011000856

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a763e594f9aef39fbb7a8cb6e256dada4be4a30d1fc3da7d3997fa204264778
+size 137041384

chat_template.jinja CHANGED Viewed

@@ -1,29 +1,18 @@
-{{ bos_token }}{%- macro strip_thinking(text) -%}
-    {%- set ns = namespace(result='') -%}
-    {%- for part in text.split('<channel|>') -%}
-        {%- if '<|channel>' in part -%}
-            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
-        {%- else -%}
-            {%- set ns.result = ns.result + part -%}
-        {%- endif -%}
-    {%- endfor -%}
-    {{- ns.result | trim -}}
-{%- endmacro -%}
-{%- set thinking = enable_thinking is defined and enable_thinking -%}
-{%- set loop_messages = messages -%}
-{%- if messages[0]['role'] in ['system', 'developer'] or thinking -%}
-    {{ '<|turn>system
-' }}
-    {%- if thinking -%}
-        {{ '<|think|>
-' }}
-    {%- endif -%}
-    {%- if messages[0]['role'] in ['system', 'developer'] -%}
-        {{ messages[0]['content'] | trim }}
-        {%- set loop_messages = messages[1:] -%}
     {%- endif -%}
-    {{ '<turn|>
-' }}
 {%- endif -%}
 {%- for message in loop_messages -%}
     {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
@@ -34,37 +23,25 @@
     {%- else -%}
         {%- set role = message['role'] -%}
     {%- endif -%}
-    {{ '<|turn>' + role + '
-' }}
     {%- if message['content'] is string -%}
-        {%- if role == "model" -%}
-            {{ strip_thinking(message['content']) }}
-        {%- else -%}
-            {{ message['content'] | trim }}
-        {%- endif -%}
     {%- elif message['content'] is iterable -%}
         {%- for item in message['content'] -%}
-            {%- if item['type'] == 'audio' -%}
-                {{ '<|audio|>' }}
-            {%- elif item['type'] == 'image' -%}
-                {{ '<|image|>' }}
-            {%- elif item['type'] == 'video' -%}
-                {{ '<|video|>' }}
             {%- elif item['type'] == 'text' -%}
-                {%- if role == "model" -%}
-                    {{ strip_thinking(item['text']) }}
-                {%- else -%}
-                    {{ item['text'] | trim }}
-                {%- endif -%}
             {%- endif -%}
         {%- endfor -%}
     {%- else -%}
         {{ raise_exception("Invalid content type") }}
     {%- endif -%}
-    {{ '<turn|>
 ' }}
 {%- endfor -%}
 {%- if add_generation_prompt -%}
-    {{'<|turn>model
-'}}
 {%- endif -%}

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
     {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
 {%- endif -%}
 {%- for message in loop_messages -%}
     {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
     {%- else -%}
         {%- set role = message['role'] -%}
     {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
     {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
     {%- elif message['content'] is iterable -%}
         {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
             {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
             {%- endif -%}
         {%- endfor -%}
     {%- else -%}
         {{ raise_exception("Invalid content type") }}
     {%- endif -%}
+    {{ '<end_of_turn>
 ' }}
 {%- endfor -%}
 {%- if add_generation_prompt -%}
+    {{ '<start_of_turn>model
+' }}
 {%- endif -%}

processor_config.json CHANGED Viewed

@@ -1,75 +1,28 @@
 {
-  "audio_ms_per_token": 40,
-  "audio_seq_length": 750,
-  "feature_extractor": {
-    "dither": 0.0,
-    "feature_extractor_type": "Gemma4AudioFeatureExtractor",
-    "feature_size": 128,
-    "fft_length": 512,
-    "fft_overdrive": false,
-    "frame_length": 320,
-    "hop_length": 160,
-    "input_scale_factor": 1.0,
-    "max_frequency": 8000.0,
-    "mel_floor": 0.001,
-    "min_frequency": 0.0,
-    "padding_side": "left",
-    "padding_value": 0.0,
-    "per_bin_mean": null,
-    "per_bin_stddev": null,
-    "preemphasis": 0.0,
-    "preemphasis_htk_flavor": true,
-    "return_attention_mask": true,
-    "sampling_rate": 16000
-  },
   "image_processor": {
-    "do_convert_rgb": true,
-    "do_normalize": false,
-    "do_rescale": true,
-    "do_resize": true,
-    "image_mean": [
-      0.0,
-      0.0,
-      0.0
-    ],
-    "image_processor_type": "Gemma4ImageProcessor",
-    "image_seq_length": 280,
-    "image_std": [
-      1.0,
-      1.0,
-      1.0
-    ],
-    "max_soft_tokens": 280,
-    "patch_size": 16,
-    "pooling_kernel_size": 3,
-    "resample": 3,
-    "rescale_factor": 0.00392156862745098
-  },
-  "image_seq_length": 280,
-  "processor_class": "Gemma4Processor",
-  "video_processor": {
-    "do_convert_rgb": true,
     "do_normalize": true,
     "do_rescale": true,
     "do_resize": true,
-    "do_sample_frames": true,
     "image_mean": [
-      0.0,
-      0.0,
-      0.0
     ],
     "image_std": [
-      1.0,
-      1.0,
-      1.0
     ],
-    "max_soft_tokens": 70,
-    "num_frames": 32,
-    "patch_size": 16,
-    "pooling_kernel_size": 3,
-    "resample": 3,
     "rescale_factor": 0.00392156862745098,
-    "return_metadata": false,
-    "video_processor_type": "Gemma4VideoProcessor"
-  }
 }

 {
   "image_processor": {
+    "do_convert_rgb": null,
     "do_normalize": true,
     "do_rescale": true,
     "do_resize": true,
     "image_mean": [
+      0.5,
+      0.5,
+      0.5
     ],
+    "image_processor_type": "Gemma3ImageProcessor",
+    "image_seq_length": 256,
     "image_std": [
+      0.5,
+      0.5,
+      0.5
     ],
+    "resample": 2,
     "rescale_factor": 0.00392156862745098,
+    "size": {
+      "height": 896,
+      "width": 896
+    }
+  },
+  "image_seq_length": 256,
+  "processor_class": "Gemma3Processor"
 }

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
-size 32169626

 version https://git-lfs.github.com/spec/v1
+oid sha256:daab2354f8a74e70d70b4d1f804939b68a8c9624dd06cb7858e52dd8970e9726
+size 33384567

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

train_summary.json CHANGED Viewed

@@ -1,20 +1,20 @@
 {
-  "base_model": "unsloth/gemma-4-26b-a4b-it",
   "train_file": "/data/train.jsonl",
   "eval_file": "/data/eval.jsonl",
-  "out_adapter": "/out/neurobait-lora-run3",
   "out_dir": "/out/outputs",
   "epochs": 3.0,
   "max_seq": 2048,
   "seed": 42,
   "n_train": 270,
   "n_eval": 30,
-  "train_loss": 0.2420340434593313,
   "eval": {
-    "eval_loss": 2.404412269592285,
-    "eval_runtime": 13.2261,
-    "eval_samples_per_second": 2.268,
-    "eval_steps_per_second": 2.268,
     "epoch": 3.0
   },
   "expected_steps": 102

 {
+  "base_model": "unsloth/gemma-3-12b-it",
   "train_file": "/data/train.jsonl",
   "eval_file": "/data/eval.jsonl",
+  "out_adapter": "/out/neurobait-lora-12b",
   "out_dir": "/out/outputs",
   "epochs": 3.0,
   "max_seq": 2048,
   "seed": 42,
   "n_train": 270,
   "n_eval": 30,
+  "train_loss": 1.7501190456689573,
   "eval": {
+    "eval_loss": 1.8844258785247803,
+    "eval_runtime": 20.1028,
+    "eval_samples_per_second": 1.492,
+    "eval_steps_per_second": 1.492,
     "epoch": 3.0
   },
   "expected_steps": 102