juanquivilla commited on
Commit
4ab7708
·
verified ·
1 Parent(s): 88c1f58

GRPO model: ROUGE-L 0.891 — matches prompted 2B

Browse files
Files changed (3) hide show
  1. config.json +62 -61
  2. model.safetensors +1 -1
  3. tokenizer_config.json +3 -4
config.json CHANGED
@@ -1,62 +1,63 @@
1
  {
2
- "architectures": [
3
- "Lfm2ForCausalLM"
4
- ],
5
- "block_auto_adjust_ff_dim": true,
6
- "block_dim": 1024,
7
- "block_ff_dim": 6656,
8
- "block_ffn_dim_multiplier": 1.0,
9
- "block_mlp_init_scale": 1.0,
10
- "block_multiple_of": 256,
11
- "block_norm_eps": 1e-05,
12
- "block_out_init_scale": 1.0,
13
- "block_use_swiglu": true,
14
- "block_use_xavier_init": true,
15
- "bos_token_id": 1,
16
- "conv_L_cache": 3,
17
- "conv_bias": false,
18
- "conv_dim": 1024,
19
- "conv_use_xavier_init": true,
20
- "torch_dtype": "bfloat16",
21
- "eos_token_id": 7,
22
- "hidden_size": 1024,
23
- "initializer_range": 0.02,
24
- "intermediate_size": 6656,
25
- "layer_types": [
26
- "conv",
27
- "conv",
28
- "full_attention",
29
- "conv",
30
- "conv",
31
- "full_attention",
32
- "conv",
33
- "conv",
34
- "full_attention",
35
- "conv",
36
- "full_attention",
37
- "conv",
38
- "full_attention",
39
- "conv",
40
- "full_attention",
41
- "conv"
42
- ],
43
- "max_position_embeddings": 128000,
44
- "model_name": "LiquidAI/LFM2.5-350M-Base",
45
- "model_type": "lfm2",
46
- "norm_eps": 1e-05,
47
- "num_attention_heads": 16,
48
- "num_heads": 16,
49
- "num_hidden_layers": 16,
50
- "num_key_value_heads": 8,
51
- "pad_token_id": 0,
52
- "rope_parameters": {
53
- "rope_theta": 1000000.0,
54
- "rope_type": "default"
55
- },
56
- "tie_embedding": true,
57
- "tie_word_embeddings": true,
58
- "unsloth_version": "2026.3.18",
59
- "use_cache": false,
60
- "use_pos_enc": true,
61
- "vocab_size": 65536
62
- }
 
 
1
  {
2
+ "architectures": [
3
+ "Lfm2ForCausalLM"
4
+ ],
5
+ "block_auto_adjust_ff_dim": true,
6
+ "block_dim": 1024,
7
+ "block_ff_dim": 6656,
8
+ "block_ffn_dim_multiplier": 1.0,
9
+ "block_mlp_init_scale": 1.0,
10
+ "block_multiple_of": 256,
11
+ "block_norm_eps": 1e-05,
12
+ "block_out_init_scale": 1.0,
13
+ "block_use_swiglu": true,
14
+ "block_use_xavier_init": true,
15
+ "bos_token_id": 1,
16
+ "conv_L_cache": 3,
17
+ "conv_bias": false,
18
+ "conv_dim": 1024,
19
+ "conv_use_xavier_init": true,
20
+ "dtype": "bfloat16",
21
+ "eos_token_id": 7,
22
+ "hidden_size": 1024,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 6656,
25
+ "layer_types": [
26
+ "conv",
27
+ "conv",
28
+ "full_attention",
29
+ "conv",
30
+ "conv",
31
+ "full_attention",
32
+ "conv",
33
+ "conv",
34
+ "full_attention",
35
+ "conv",
36
+ "full_attention",
37
+ "conv",
38
+ "full_attention",
39
+ "conv",
40
+ "full_attention",
41
+ "conv"
42
+ ],
43
+ "max_position_embeddings": 128000,
44
+ "model_name": "LiquidAI/LFM2.5-350M-Base",
45
+ "model_type": "lfm2",
46
+ "norm_eps": 1e-05,
47
+ "num_attention_heads": 16,
48
+ "num_heads": 16,
49
+ "num_hidden_layers": 16,
50
+ "num_key_value_heads": 8,
51
+ "pad_token_id": 0,
52
+ "rope_parameters": {
53
+ "rope_theta": 1000000.0,
54
+ "rope_type": "default"
55
+ },
56
+ "tie_embedding": true,
57
+ "tie_word_embeddings": true,
58
+ "transformers_version": "5.3.0",
59
+ "unsloth_version": "2026.3.18",
60
+ "use_cache": false,
61
+ "use_pos_enc": true,
62
+ "vocab_size": 65536
63
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93c9ad95a97e816d22da4bdc2a8de79b554dbdcf910687502769403bd38ba839
3
  size 708984464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cab7fee9107d3f698a2c5c1567638ab8d31427327303325ed7d34070c1a7bd2
3
  size 708984464
tokenizer_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "clean_up_tokenization_spaces": false,
5
  "eos_token": "<|im_end|>",
6
  "extra_special_tokens": [],
7
- "is_local": false,
8
  "legacy": false,
9
  "model_input_names": [
10
  "input_ids",
@@ -17,6 +17,5 @@
17
  "spaces_between_special_tokens": false,
18
  "tokenizer_class": "TokenizersBackend",
19
  "use_default_system_prompt": false,
20
- "use_fast": true,
21
- "chat_template": "{{- bos_token -}}{%- set system_prompt = \"\" -%}{%- set ns = namespace(system_prompt=\"\") -%}{%- if messages[0][\"role\"] == \"system\" -%} {%- set ns.system_prompt = messages[0][\"content\"] -%} {%- set messages = messages[1:] -%}{%- endif -%}{%- if tools -%} {%- set ns.system_prompt = ns.system_prompt + (\"\n\" if ns.system_prompt else \"\") + \"List of tools: <|tool_list_start|>[\" -%} {%- for tool in tools -%} {%- if tool is not string -%} {%- set tool = tool | tojson -%} {%- endif -%} {%- set ns.system_prompt = ns.system_prompt + tool -%} {%- if not loop.last -%} {%- set ns.system_prompt = ns.system_prompt + \", \" -%} {%- endif -%} {%- endfor -%} {%- set ns.system_prompt = ns.system_prompt + \"]<|tool_list_end|>\" -%}{%- endif -%}{%- if ns.system_prompt -%} {{- \"<|im_start|>system\n\" + ns.system_prompt + \"<|im_end|>\n\" -}}{%- endif -%}{%- for message in messages -%} {{- \"<|im_start|>\" + message[\"role\"] + \"\n\" -}} {%- set content = message[\"content\"] -%} {%- if content is not string -%} {%- set content = content | tojson -%} {%- endif -%} {%- if message[\"role\"] == \"tool\" -%} {%- set content = \"<|tool_response_start|>\" + content + \"<|tool_response_end|>\" -%} {%- endif -%} {{- content + \"<|im_end|>\n\" -}}{%- endfor -%}{%- if add_generation_prompt -%} {{- \"<|im_start|>assistant\n\" -}}{%- endif -%}"
22
- }
 
4
  "clean_up_tokenization_spaces": false,
5
  "eos_token": "<|im_end|>",
6
  "extra_special_tokens": [],
7
+ "is_local": true,
8
  "legacy": false,
9
  "model_input_names": [
10
  "input_ids",
 
17
  "spaces_between_special_tokens": false,
18
  "tokenizer_class": "TokenizersBackend",
19
  "use_default_system_prompt": false,
20
+ "use_fast": true
21
+ }