Text Generation
PEFT
Safetensors
lora
gemma-3
adhd
neurodivergent
task-initiation
build-small-hackathon
backyard-ai
modal
zerogpu
conversational
Instructions to use build-small-hackathon/NeuroBait with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use build-small-hackathon/NeuroBait with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/gemma-3-12b-it") model = PeftModel.from_pretrained(base_model, "build-small-hackathon/NeuroBait") - Notebooks
- Google Colab
- Kaggle
Upload folder using huggingface_hub
Browse files- README.md +9 -9
- adapter_config.json +8 -11
- adapter_model.safetensors +2 -2
- chat_template.jinja +23 -46
- processor_config.json +17 -64
- tokenizer.json +2 -2
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- train_summary.json +7 -7
README.md
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
base_model: unsloth/gemma-
|
| 3 |
library_name: peft
|
| 4 |
pipeline_tag: text-generation
|
| 5 |
tags:
|
| 6 |
- lora
|
| 7 |
- peft
|
| 8 |
-
- gemma-
|
| 9 |
- adhd
|
| 10 |
- neurodivergent
|
| 11 |
- task-initiation
|
|
@@ -15,7 +15,7 @@ license: other
|
|
| 15 |
|
| 16 |
# NeuroBait
|
| 17 |
|
| 18 |
-
NeuroBait is a LoRA fine-tune of `unsloth/gemma-
|
| 19 |
neurodivergent task-initiation conversations. It is designed to produce warm,
|
| 20 |
short, agency-preserving prose that helps a user start one tiny next move
|
| 21 |
without turning the conversation into a full to-do list.
|
|
@@ -54,7 +54,7 @@ The dataset is not included in this model repo.
|
|
| 54 |
|
| 55 |
## Training Configuration
|
| 56 |
|
| 57 |
-
- Base: `unsloth/gemma-
|
| 58 |
- Method: 16-bit LoRA
|
| 59 |
- LoRA rank: 16
|
| 60 |
- LoRA alpha: 16
|
|
@@ -68,8 +68,8 @@ The dataset is not included in this model repo.
|
|
| 68 |
- Warmup ratio: 0.05
|
| 69 |
- Optimizer: adamw 8-bit
|
| 70 |
- Precision: bf16
|
| 71 |
-
- Chat template: `gemma-
|
| 72 |
-
- Response-only markers: `<
|
| 73 |
|
| 74 |
Training ran on Modal with an H100 80GB GPU.
|
| 75 |
|
|
@@ -109,14 +109,14 @@ Example adapter loading path:
|
|
| 109 |
|
| 110 |
```python
|
| 111 |
from peft import PeftModel
|
| 112 |
-
from transformers import
|
| 113 |
import torch
|
| 114 |
|
| 115 |
-
base_model = "unsloth/gemma-
|
| 116 |
adapter_id = "build-small-hackathon/NeuroBait"
|
| 117 |
|
| 118 |
tokenizer = AutoTokenizer.from_pretrained(adapter_id)
|
| 119 |
-
model =
|
| 120 |
base_model,
|
| 121 |
quantization_config=BitsAndBytesConfig(
|
| 122 |
load_in_4bit=True,
|
|
|
|
| 1 |
---
|
| 2 |
+
base_model: unsloth/gemma-3-12b-it
|
| 3 |
library_name: peft
|
| 4 |
pipeline_tag: text-generation
|
| 5 |
tags:
|
| 6 |
- lora
|
| 7 |
- peft
|
| 8 |
+
- gemma-3
|
| 9 |
- adhd
|
| 10 |
- neurodivergent
|
| 11 |
- task-initiation
|
|
|
|
| 15 |
|
| 16 |
# NeuroBait
|
| 17 |
|
| 18 |
+
NeuroBait is a LoRA fine-tune of `unsloth/gemma-3-12b-it` for ADHD and
|
| 19 |
neurodivergent task-initiation conversations. It is designed to produce warm,
|
| 20 |
short, agency-preserving prose that helps a user start one tiny next move
|
| 21 |
without turning the conversation into a full to-do list.
|
|
|
|
| 54 |
|
| 55 |
## Training Configuration
|
| 56 |
|
| 57 |
+
- Base: `unsloth/gemma-3-12b-it` (dense Gemma 3)
|
| 58 |
- Method: 16-bit LoRA
|
| 59 |
- LoRA rank: 16
|
| 60 |
- LoRA alpha: 16
|
|
|
|
| 68 |
- Warmup ratio: 0.05
|
| 69 |
- Optimizer: adamw 8-bit
|
| 70 |
- Precision: bf16
|
| 71 |
+
- Chat template: `gemma-3`
|
| 72 |
+
- Response-only markers: `<start_of_turn>user\n` / `<start_of_turn>model\n`
|
| 73 |
|
| 74 |
Training ran on Modal with an H100 80GB GPU.
|
| 75 |
|
|
|
|
| 109 |
|
| 110 |
```python
|
| 111 |
from peft import PeftModel
|
| 112 |
+
from transformers import AutoModelForImageTextToText, AutoTokenizer, BitsAndBytesConfig
|
| 113 |
import torch
|
| 114 |
|
| 115 |
+
base_model = "unsloth/gemma-3-12b-it"
|
| 116 |
adapter_id = "build-small-hackathon/NeuroBait"
|
| 117 |
|
| 118 |
tokenizer = AutoTokenizer.from_pretrained(adapter_id)
|
| 119 |
+
model = AutoModelForImageTextToText.from_pretrained(
|
| 120 |
base_model,
|
| 121 |
quantization_config=BitsAndBytesConfig(
|
| 122 |
load_in_4bit=True,
|
adapter_config.json
CHANGED
|
@@ -3,11 +3,11 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": {
|
| 6 |
-
"base_model_class": "
|
| 7 |
-
"parent_library": "transformers.models.
|
| 8 |
"unsloth_fixed": true
|
| 9 |
},
|
| 10 |
-
"base_model_name_or_path": "unsloth/gemma-
|
| 11 |
"bias": "none",
|
| 12 |
"corda_config": null,
|
| 13 |
"ensure_weight_tying": false,
|
|
@@ -34,18 +34,15 @@
|
|
| 34 |
"rank_pattern": {},
|
| 35 |
"revision": null,
|
| 36 |
"target_modules": [
|
|
|
|
| 37 |
"v_proj",
|
| 38 |
"o_proj",
|
| 39 |
-
"k_proj",
|
| 40 |
-
"gate_proj",
|
| 41 |
"up_proj",
|
| 42 |
-
"
|
| 43 |
-
"q_proj"
|
| 44 |
-
|
| 45 |
-
"target_parameters": [
|
| 46 |
-
"experts.gate_up_proj",
|
| 47 |
-
"experts.down_proj"
|
| 48 |
],
|
|
|
|
| 49 |
"task_type": "CAUSAL_LM",
|
| 50 |
"trainable_token_indices": null,
|
| 51 |
"use_bdlora": null,
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": {
|
| 6 |
+
"base_model_class": "Gemma3ForConditionalGeneration",
|
| 7 |
+
"parent_library": "transformers.models.gemma3.modeling_gemma3",
|
| 8 |
"unsloth_fixed": true
|
| 9 |
},
|
| 10 |
+
"base_model_name_or_path": "unsloth/gemma-3-12b-it",
|
| 11 |
"bias": "none",
|
| 12 |
"corda_config": null,
|
| 13 |
"ensure_weight_tying": false,
|
|
|
|
| 34 |
"rank_pattern": {},
|
| 35 |
"revision": null,
|
| 36 |
"target_modules": [
|
| 37 |
+
"down_proj",
|
| 38 |
"v_proj",
|
| 39 |
"o_proj",
|
|
|
|
|
|
|
| 40 |
"up_proj",
|
| 41 |
+
"gate_proj",
|
| 42 |
+
"q_proj",
|
| 43 |
+
"k_proj"
|
|
|
|
|
|
|
|
|
|
| 44 |
],
|
| 45 |
+
"target_parameters": null,
|
| 46 |
"task_type": "CAUSAL_LM",
|
| 47 |
"trainable_token_indices": null,
|
| 48 |
"use_bdlora": null,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a763e594f9aef39fbb7a8cb6e256dada4be4a30d1fc3da7d3997fa204264778
|
| 3 |
+
size 137041384
|
chat_template.jinja
CHANGED
|
@@ -1,29 +1,18 @@
|
|
| 1 |
-
{{ bos_token }}
|
| 2 |
-
|
| 3 |
-
{%-
|
| 4 |
-
{%-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
{%-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
{%- endmacro -%}
|
| 12 |
-
{%- set thinking = enable_thinking is defined and enable_thinking -%}
|
| 13 |
-
{%- set loop_messages = messages -%}
|
| 14 |
-
{%- if messages[0]['role'] in ['system', 'developer'] or thinking -%}
|
| 15 |
-
{{ '<|turn>system
|
| 16 |
-
' }}
|
| 17 |
-
{%- if thinking -%}
|
| 18 |
-
{{ '<|think|>
|
| 19 |
-
' }}
|
| 20 |
-
{%- endif -%}
|
| 21 |
-
{%- if messages[0]['role'] in ['system', 'developer'] -%}
|
| 22 |
-
{{ messages[0]['content'] | trim }}
|
| 23 |
-
{%- set loop_messages = messages[1:] -%}
|
| 24 |
{%- endif -%}
|
| 25 |
-
{
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
{%- endif -%}
|
| 28 |
{%- for message in loop_messages -%}
|
| 29 |
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
|
@@ -34,37 +23,25 @@
|
|
| 34 |
{%- else -%}
|
| 35 |
{%- set role = message['role'] -%}
|
| 36 |
{%- endif -%}
|
| 37 |
-
{{ '<
|
| 38 |
-
' }}
|
| 39 |
{%- if message['content'] is string -%}
|
| 40 |
-
{
|
| 41 |
-
{{ strip_thinking(message['content']) }}
|
| 42 |
-
{%- else -%}
|
| 43 |
-
{{ message['content'] | trim }}
|
| 44 |
-
{%- endif -%}
|
| 45 |
{%- elif message['content'] is iterable -%}
|
| 46 |
{%- for item in message['content'] -%}
|
| 47 |
-
{%- if item['type'] == '
|
| 48 |
-
{{ '<
|
| 49 |
-
{%- elif item['type'] == 'image' -%}
|
| 50 |
-
{{ '<|image|>' }}
|
| 51 |
-
{%- elif item['type'] == 'video' -%}
|
| 52 |
-
{{ '<|video|>' }}
|
| 53 |
{%- elif item['type'] == 'text' -%}
|
| 54 |
-
{
|
| 55 |
-
{{ strip_thinking(item['text']) }}
|
| 56 |
-
{%- else -%}
|
| 57 |
-
{{ item['text'] | trim }}
|
| 58 |
-
{%- endif -%}
|
| 59 |
{%- endif -%}
|
| 60 |
{%- endfor -%}
|
| 61 |
{%- else -%}
|
| 62 |
{{ raise_exception("Invalid content type") }}
|
| 63 |
{%- endif -%}
|
| 64 |
-
{{ '<
|
| 65 |
' }}
|
| 66 |
{%- endfor -%}
|
| 67 |
{%- if add_generation_prompt -%}
|
| 68 |
-
{{'<
|
| 69 |
-
'}}
|
| 70 |
{%- endif -%}
|
|
|
|
| 1 |
+
{{ bos_token }}
|
| 2 |
+
{%- if messages[0]['role'] == 'system' -%}
|
| 3 |
+
{%- if messages[0]['content'] is string -%}
|
| 4 |
+
{%- set first_user_prefix = messages[0]['content'] + '
|
| 5 |
+
|
| 6 |
+
' -%}
|
| 7 |
+
{%- else -%}
|
| 8 |
+
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '
|
| 9 |
+
|
| 10 |
+
' -%}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
{%- endif -%}
|
| 12 |
+
{%- set loop_messages = messages[1:] -%}
|
| 13 |
+
{%- else -%}
|
| 14 |
+
{%- set first_user_prefix = "" -%}
|
| 15 |
+
{%- set loop_messages = messages -%}
|
| 16 |
{%- endif -%}
|
| 17 |
{%- for message in loop_messages -%}
|
| 18 |
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
|
|
|
| 23 |
{%- else -%}
|
| 24 |
{%- set role = message['role'] -%}
|
| 25 |
{%- endif -%}
|
| 26 |
+
{{ '<start_of_turn>' + role + '
|
| 27 |
+
' + (first_user_prefix if loop.first else "") }}
|
| 28 |
{%- if message['content'] is string -%}
|
| 29 |
+
{{ message['content'] | trim }}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
{%- elif message['content'] is iterable -%}
|
| 31 |
{%- for item in message['content'] -%}
|
| 32 |
+
{%- if item['type'] == 'image' -%}
|
| 33 |
+
{{ '<start_of_image>' }}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
{%- elif item['type'] == 'text' -%}
|
| 35 |
+
{{ item['text'] | trim }}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
{%- endif -%}
|
| 37 |
{%- endfor -%}
|
| 38 |
{%- else -%}
|
| 39 |
{{ raise_exception("Invalid content type") }}
|
| 40 |
{%- endif -%}
|
| 41 |
+
{{ '<end_of_turn>
|
| 42 |
' }}
|
| 43 |
{%- endfor -%}
|
| 44 |
{%- if add_generation_prompt -%}
|
| 45 |
+
{{ '<start_of_turn>model
|
| 46 |
+
' }}
|
| 47 |
{%- endif -%}
|
processor_config.json
CHANGED
|
@@ -1,75 +1,28 @@
|
|
| 1 |
{
|
| 2 |
-
"audio_ms_per_token": 40,
|
| 3 |
-
"audio_seq_length": 750,
|
| 4 |
-
"feature_extractor": {
|
| 5 |
-
"dither": 0.0,
|
| 6 |
-
"feature_extractor_type": "Gemma4AudioFeatureExtractor",
|
| 7 |
-
"feature_size": 128,
|
| 8 |
-
"fft_length": 512,
|
| 9 |
-
"fft_overdrive": false,
|
| 10 |
-
"frame_length": 320,
|
| 11 |
-
"hop_length": 160,
|
| 12 |
-
"input_scale_factor": 1.0,
|
| 13 |
-
"max_frequency": 8000.0,
|
| 14 |
-
"mel_floor": 0.001,
|
| 15 |
-
"min_frequency": 0.0,
|
| 16 |
-
"padding_side": "left",
|
| 17 |
-
"padding_value": 0.0,
|
| 18 |
-
"per_bin_mean": null,
|
| 19 |
-
"per_bin_stddev": null,
|
| 20 |
-
"preemphasis": 0.0,
|
| 21 |
-
"preemphasis_htk_flavor": true,
|
| 22 |
-
"return_attention_mask": true,
|
| 23 |
-
"sampling_rate": 16000
|
| 24 |
-
},
|
| 25 |
"image_processor": {
|
| 26 |
-
"do_convert_rgb":
|
| 27 |
-
"do_normalize": false,
|
| 28 |
-
"do_rescale": true,
|
| 29 |
-
"do_resize": true,
|
| 30 |
-
"image_mean": [
|
| 31 |
-
0.0,
|
| 32 |
-
0.0,
|
| 33 |
-
0.0
|
| 34 |
-
],
|
| 35 |
-
"image_processor_type": "Gemma4ImageProcessor",
|
| 36 |
-
"image_seq_length": 280,
|
| 37 |
-
"image_std": [
|
| 38 |
-
1.0,
|
| 39 |
-
1.0,
|
| 40 |
-
1.0
|
| 41 |
-
],
|
| 42 |
-
"max_soft_tokens": 280,
|
| 43 |
-
"patch_size": 16,
|
| 44 |
-
"pooling_kernel_size": 3,
|
| 45 |
-
"resample": 3,
|
| 46 |
-
"rescale_factor": 0.00392156862745098
|
| 47 |
-
},
|
| 48 |
-
"image_seq_length": 280,
|
| 49 |
-
"processor_class": "Gemma4Processor",
|
| 50 |
-
"video_processor": {
|
| 51 |
-
"do_convert_rgb": true,
|
| 52 |
"do_normalize": true,
|
| 53 |
"do_rescale": true,
|
| 54 |
"do_resize": true,
|
| 55 |
-
"do_sample_frames": true,
|
| 56 |
"image_mean": [
|
| 57 |
-
0.
|
| 58 |
-
0.
|
| 59 |
-
0.
|
| 60 |
],
|
|
|
|
|
|
|
| 61 |
"image_std": [
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
],
|
| 66 |
-
"
|
| 67 |
-
"num_frames": 32,
|
| 68 |
-
"patch_size": 16,
|
| 69 |
-
"pooling_kernel_size": 3,
|
| 70 |
-
"resample": 3,
|
| 71 |
"rescale_factor": 0.00392156862745098,
|
| 72 |
-
"
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"image_processor": {
|
| 3 |
+
"do_convert_rgb": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"do_normalize": true,
|
| 5 |
"do_rescale": true,
|
| 6 |
"do_resize": true,
|
|
|
|
| 7 |
"image_mean": [
|
| 8 |
+
0.5,
|
| 9 |
+
0.5,
|
| 10 |
+
0.5
|
| 11 |
],
|
| 12 |
+
"image_processor_type": "Gemma3ImageProcessor",
|
| 13 |
+
"image_seq_length": 256,
|
| 14 |
"image_std": [
|
| 15 |
+
0.5,
|
| 16 |
+
0.5,
|
| 17 |
+
0.5
|
| 18 |
],
|
| 19 |
+
"resample": 2,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
"rescale_factor": 0.00392156862745098,
|
| 21 |
+
"size": {
|
| 22 |
+
"height": 896,
|
| 23 |
+
"width": 896
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"image_seq_length": 256,
|
| 27 |
+
"processor_class": "Gemma3Processor"
|
| 28 |
}
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daab2354f8a74e70d70b4d1f804939b68a8c9624dd06cb7858e52dd8970e9726
|
| 3 |
+
size 33384567
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
tokenizer_config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_summary.json
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
{
|
| 2 |
-
"base_model": "unsloth/gemma-
|
| 3 |
"train_file": "/data/train.jsonl",
|
| 4 |
"eval_file": "/data/eval.jsonl",
|
| 5 |
-
"out_adapter": "/out/neurobait-lora-
|
| 6 |
"out_dir": "/out/outputs",
|
| 7 |
"epochs": 3.0,
|
| 8 |
"max_seq": 2048,
|
| 9 |
"seed": 42,
|
| 10 |
"n_train": 270,
|
| 11 |
"n_eval": 30,
|
| 12 |
-
"train_loss":
|
| 13 |
"eval": {
|
| 14 |
-
"eval_loss":
|
| 15 |
-
"eval_runtime":
|
| 16 |
-
"eval_samples_per_second":
|
| 17 |
-
"eval_steps_per_second":
|
| 18 |
"epoch": 3.0
|
| 19 |
},
|
| 20 |
"expected_steps": 102
|
|
|
|
| 1 |
{
|
| 2 |
+
"base_model": "unsloth/gemma-3-12b-it",
|
| 3 |
"train_file": "/data/train.jsonl",
|
| 4 |
"eval_file": "/data/eval.jsonl",
|
| 5 |
+
"out_adapter": "/out/neurobait-lora-12b",
|
| 6 |
"out_dir": "/out/outputs",
|
| 7 |
"epochs": 3.0,
|
| 8 |
"max_seq": 2048,
|
| 9 |
"seed": 42,
|
| 10 |
"n_train": 270,
|
| 11 |
"n_eval": 30,
|
| 12 |
+
"train_loss": 1.7501190456689573,
|
| 13 |
"eval": {
|
| 14 |
+
"eval_loss": 1.8844258785247803,
|
| 15 |
+
"eval_runtime": 20.1028,
|
| 16 |
+
"eval_samples_per_second": 1.492,
|
| 17 |
+
"eval_steps_per_second": 1.492,
|
| 18 |
"epoch": 3.0
|
| 19 |
},
|
| 20 |
"expected_steps": 102
|