Haris-Subrata commited on
Commit
62a9471
·
verified ·
1 Parent(s): 7ad340a

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
- base_model: unsloth/gemma-4-26b-a4b-it
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
6
  - lora
7
  - peft
8
- - gemma-4
9
  - adhd
10
  - neurodivergent
11
  - task-initiation
@@ -15,7 +15,7 @@ license: other
15
 
16
  # NeuroBait
17
 
18
- NeuroBait is a LoRA fine-tune of `unsloth/gemma-4-26b-a4b-it` for ADHD and
19
  neurodivergent task-initiation conversations. It is designed to produce warm,
20
  short, agency-preserving prose that helps a user start one tiny next move
21
  without turning the conversation into a full to-do list.
@@ -54,7 +54,7 @@ The dataset is not included in this model repo.
54
 
55
  ## Training Configuration
56
 
57
- - Base: `unsloth/gemma-4-26b-a4b-it`
58
  - Method: 16-bit LoRA
59
  - LoRA rank: 16
60
  - LoRA alpha: 16
@@ -68,8 +68,8 @@ The dataset is not included in this model repo.
68
  - Warmup ratio: 0.05
69
  - Optimizer: adamw 8-bit
70
  - Precision: bf16
71
- - Chat template: `gemma-4`
72
- - Response-only markers: `<|turn>user\n` / `<|turn>model\n`
73
 
74
  Training ran on Modal with an H100 80GB GPU.
75
 
@@ -109,14 +109,14 @@ Example adapter loading path:
109
 
110
  ```python
111
  from peft import PeftModel
112
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
113
  import torch
114
 
115
- base_model = "unsloth/gemma-4-26b-a4b-it"
116
  adapter_id = "build-small-hackathon/NeuroBait"
117
 
118
  tokenizer = AutoTokenizer.from_pretrained(adapter_id)
119
- model = AutoModelForCausalLM.from_pretrained(
120
  base_model,
121
  quantization_config=BitsAndBytesConfig(
122
  load_in_4bit=True,
 
1
  ---
2
+ base_model: unsloth/gemma-3-12b-it
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
6
  - lora
7
  - peft
8
+ - gemma-3
9
  - adhd
10
  - neurodivergent
11
  - task-initiation
 
15
 
16
  # NeuroBait
17
 
18
+ NeuroBait is a LoRA fine-tune of `unsloth/gemma-3-12b-it` for ADHD and
19
  neurodivergent task-initiation conversations. It is designed to produce warm,
20
  short, agency-preserving prose that helps a user start one tiny next move
21
  without turning the conversation into a full to-do list.
 
54
 
55
  ## Training Configuration
56
 
57
+ - Base: `unsloth/gemma-3-12b-it` (dense Gemma 3)
58
  - Method: 16-bit LoRA
59
  - LoRA rank: 16
60
  - LoRA alpha: 16
 
68
  - Warmup ratio: 0.05
69
  - Optimizer: adamw 8-bit
70
  - Precision: bf16
71
+ - Chat template: `gemma-3`
72
+ - Response-only markers: `<start_of_turn>user\n` / `<start_of_turn>model\n`
73
 
74
  Training ran on Modal with an H100 80GB GPU.
75
 
 
109
 
110
  ```python
111
  from peft import PeftModel
112
+ from transformers import AutoModelForImageTextToText, AutoTokenizer, BitsAndBytesConfig
113
  import torch
114
 
115
+ base_model = "unsloth/gemma-3-12b-it"
116
  adapter_id = "build-small-hackathon/NeuroBait"
117
 
118
  tokenizer = AutoTokenizer.from_pretrained(adapter_id)
119
+ model = AutoModelForImageTextToText.from_pretrained(
120
  base_model,
121
  quantization_config=BitsAndBytesConfig(
122
  load_in_4bit=True,
adapter_config.json CHANGED
@@ -3,11 +3,11 @@
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": {
6
- "base_model_class": "Gemma4ForConditionalGeneration",
7
- "parent_library": "transformers.models.gemma4.modeling_gemma4",
8
  "unsloth_fixed": true
9
  },
10
- "base_model_name_or_path": "unsloth/gemma-4-26b-a4b-it",
11
  "bias": "none",
12
  "corda_config": null,
13
  "ensure_weight_tying": false,
@@ -34,18 +34,15 @@
34
  "rank_pattern": {},
35
  "revision": null,
36
  "target_modules": [
 
37
  "v_proj",
38
  "o_proj",
39
- "k_proj",
40
- "gate_proj",
41
  "up_proj",
42
- "down_proj",
43
- "q_proj"
44
- ],
45
- "target_parameters": [
46
- "experts.gate_up_proj",
47
- "experts.down_proj"
48
  ],
 
49
  "task_type": "CAUSAL_LM",
50
  "trainable_token_indices": null,
51
  "use_bdlora": null,
 
3
  "alpha_pattern": {},
4
  "arrow_config": null,
5
  "auto_mapping": {
6
+ "base_model_class": "Gemma3ForConditionalGeneration",
7
+ "parent_library": "transformers.models.gemma3.modeling_gemma3",
8
  "unsloth_fixed": true
9
  },
10
+ "base_model_name_or_path": "unsloth/gemma-3-12b-it",
11
  "bias": "none",
12
  "corda_config": null,
13
  "ensure_weight_tying": false,
 
34
  "rank_pattern": {},
35
  "revision": null,
36
  "target_modules": [
37
+ "down_proj",
38
  "v_proj",
39
  "o_proj",
 
 
40
  "up_proj",
41
+ "gate_proj",
42
+ "q_proj",
43
+ "k_proj"
 
 
 
44
  ],
45
+ "target_parameters": null,
46
  "task_type": "CAUSAL_LM",
47
  "trainable_token_indices": null,
48
  "use_bdlora": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:686c075b96bc79f5208bf275598f5a344142e05131d171b191e632823cd8e5ad
3
- size 1011000856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a763e594f9aef39fbb7a8cb6e256dada4be4a30d1fc3da7d3997fa204264778
3
+ size 137041384
chat_template.jinja CHANGED
@@ -1,29 +1,18 @@
1
- {{ bos_token }}{%- macro strip_thinking(text) -%}
2
- {%- set ns = namespace(result='') -%}
3
- {%- for part in text.split('<channel|>') -%}
4
- {%- if '<|channel>' in part -%}
5
- {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
6
- {%- else -%}
7
- {%- set ns.result = ns.result + part -%}
8
- {%- endif -%}
9
- {%- endfor -%}
10
- {{- ns.result | trim -}}
11
- {%- endmacro -%}
12
- {%- set thinking = enable_thinking is defined and enable_thinking -%}
13
- {%- set loop_messages = messages -%}
14
- {%- if messages[0]['role'] in ['system', 'developer'] or thinking -%}
15
- {{ '<|turn>system
16
- ' }}
17
- {%- if thinking -%}
18
- {{ '<|think|>
19
- ' }}
20
- {%- endif -%}
21
- {%- if messages[0]['role'] in ['system', 'developer'] -%}
22
- {{ messages[0]['content'] | trim }}
23
- {%- set loop_messages = messages[1:] -%}
24
  {%- endif -%}
25
- {{ '<turn|>
26
- ' }}
 
 
27
  {%- endif -%}
28
  {%- for message in loop_messages -%}
29
  {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
@@ -34,37 +23,25 @@
34
  {%- else -%}
35
  {%- set role = message['role'] -%}
36
  {%- endif -%}
37
- {{ '<|turn>' + role + '
38
- ' }}
39
  {%- if message['content'] is string -%}
40
- {%- if role == "model" -%}
41
- {{ strip_thinking(message['content']) }}
42
- {%- else -%}
43
- {{ message['content'] | trim }}
44
- {%- endif -%}
45
  {%- elif message['content'] is iterable -%}
46
  {%- for item in message['content'] -%}
47
- {%- if item['type'] == 'audio' -%}
48
- {{ '<|audio|>' }}
49
- {%- elif item['type'] == 'image' -%}
50
- {{ '<|image|>' }}
51
- {%- elif item['type'] == 'video' -%}
52
- {{ '<|video|>' }}
53
  {%- elif item['type'] == 'text' -%}
54
- {%- if role == "model" -%}
55
- {{ strip_thinking(item['text']) }}
56
- {%- else -%}
57
- {{ item['text'] | trim }}
58
- {%- endif -%}
59
  {%- endif -%}
60
  {%- endfor -%}
61
  {%- else -%}
62
  {{ raise_exception("Invalid content type") }}
63
  {%- endif -%}
64
- {{ '<turn|>
65
  ' }}
66
  {%- endfor -%}
67
  {%- if add_generation_prompt -%}
68
- {{'<|turn>model
69
- '}}
70
  {%- endif -%}
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
  {%- endif -%}
17
  {%- for message in loop_messages -%}
18
  {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
 
23
  {%- else -%}
24
  {%- set role = message['role'] -%}
25
  {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
  {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
 
 
 
 
30
  {%- elif message['content'] is iterable -%}
31
  {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
 
 
 
 
34
  {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
 
 
 
 
36
  {%- endif -%}
37
  {%- endfor -%}
38
  {%- else -%}
39
  {{ raise_exception("Invalid content type") }}
40
  {%- endif -%}
41
+ {{ '<end_of_turn>
42
  ' }}
43
  {%- endfor -%}
44
  {%- if add_generation_prompt -%}
45
+ {{ '<start_of_turn>model
46
+ ' }}
47
  {%- endif -%}
processor_config.json CHANGED
@@ -1,75 +1,28 @@
1
  {
2
- "audio_ms_per_token": 40,
3
- "audio_seq_length": 750,
4
- "feature_extractor": {
5
- "dither": 0.0,
6
- "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
- "feature_size": 128,
8
- "fft_length": 512,
9
- "fft_overdrive": false,
10
- "frame_length": 320,
11
- "hop_length": 160,
12
- "input_scale_factor": 1.0,
13
- "max_frequency": 8000.0,
14
- "mel_floor": 0.001,
15
- "min_frequency": 0.0,
16
- "padding_side": "left",
17
- "padding_value": 0.0,
18
- "per_bin_mean": null,
19
- "per_bin_stddev": null,
20
- "preemphasis": 0.0,
21
- "preemphasis_htk_flavor": true,
22
- "return_attention_mask": true,
23
- "sampling_rate": 16000
24
- },
25
  "image_processor": {
26
- "do_convert_rgb": true,
27
- "do_normalize": false,
28
- "do_rescale": true,
29
- "do_resize": true,
30
- "image_mean": [
31
- 0.0,
32
- 0.0,
33
- 0.0
34
- ],
35
- "image_processor_type": "Gemma4ImageProcessor",
36
- "image_seq_length": 280,
37
- "image_std": [
38
- 1.0,
39
- 1.0,
40
- 1.0
41
- ],
42
- "max_soft_tokens": 280,
43
- "patch_size": 16,
44
- "pooling_kernel_size": 3,
45
- "resample": 3,
46
- "rescale_factor": 0.00392156862745098
47
- },
48
- "image_seq_length": 280,
49
- "processor_class": "Gemma4Processor",
50
- "video_processor": {
51
- "do_convert_rgb": true,
52
  "do_normalize": true,
53
  "do_rescale": true,
54
  "do_resize": true,
55
- "do_sample_frames": true,
56
  "image_mean": [
57
- 0.0,
58
- 0.0,
59
- 0.0
60
  ],
 
 
61
  "image_std": [
62
- 1.0,
63
- 1.0,
64
- 1.0
65
  ],
66
- "max_soft_tokens": 70,
67
- "num_frames": 32,
68
- "patch_size": 16,
69
- "pooling_kernel_size": 3,
70
- "resample": 3,
71
  "rescale_factor": 0.00392156862745098,
72
- "return_metadata": false,
73
- "video_processor_type": "Gemma4VideoProcessor"
74
- }
 
 
 
 
75
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "image_processor": {
3
+ "do_convert_rgb": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "do_normalize": true,
5
  "do_rescale": true,
6
  "do_resize": true,
 
7
  "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
  ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
  "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
  ],
19
+ "resample": 2,
 
 
 
 
20
  "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "height": 896,
23
+ "width": 896
24
+ }
25
+ },
26
+ "image_seq_length": 256,
27
+ "processor_class": "Gemma3Processor"
28
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
- size 32169626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daab2354f8a74e70d70b4d1f804939b68a8c9624dd06cb7858e52dd8970e9726
3
+ size 33384567
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_summary.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
- "base_model": "unsloth/gemma-4-26b-a4b-it",
3
  "train_file": "/data/train.jsonl",
4
  "eval_file": "/data/eval.jsonl",
5
- "out_adapter": "/out/neurobait-lora-run3",
6
  "out_dir": "/out/outputs",
7
  "epochs": 3.0,
8
  "max_seq": 2048,
9
  "seed": 42,
10
  "n_train": 270,
11
  "n_eval": 30,
12
- "train_loss": 0.2420340434593313,
13
  "eval": {
14
- "eval_loss": 2.404412269592285,
15
- "eval_runtime": 13.2261,
16
- "eval_samples_per_second": 2.268,
17
- "eval_steps_per_second": 2.268,
18
  "epoch": 3.0
19
  },
20
  "expected_steps": 102
 
1
  {
2
+ "base_model": "unsloth/gemma-3-12b-it",
3
  "train_file": "/data/train.jsonl",
4
  "eval_file": "/data/eval.jsonl",
5
+ "out_adapter": "/out/neurobait-lora-12b",
6
  "out_dir": "/out/outputs",
7
  "epochs": 3.0,
8
  "max_seq": 2048,
9
  "seed": 42,
10
  "n_train": 270,
11
  "n_eval": 30,
12
+ "train_loss": 1.7501190456689573,
13
  "eval": {
14
+ "eval_loss": 1.8844258785247803,
15
+ "eval_runtime": 20.1028,
16
+ "eval_samples_per_second": 1.492,
17
+ "eval_steps_per_second": 1.492,
18
  "epoch": 3.0
19
  },
20
  "expected_steps": 102