punitdecomp commited on
Commit
1f85d40
·
verified ·
1 Parent(s): e794f69

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - nebula-s
5
+ - svms
6
+ - math-reasoning
7
+ - competition-math
8
+ - quantized
9
+ - int4
10
+ - hqq
11
+ library_name: transformers
12
+ ---
13
+
14
+ # Nebula-S-v1-lite
15
+
16
+ Lightweight (~3GB) version of [Nebula-S-v1](https://huggingface.co/punitdecomp/Nebula-S-v1), pre-quantized to int4 using [HQQ](https://github.com/mobiusml/hqq) (Half-Quadratic Quantization).
17
+
18
+ **Runs on Mac (MPS), CUDA, and CPU.**
19
+
20
+ | Variant | Download | Runtime | Platform |
21
+ |---|---|---|---|
22
+ | [Nebula-S-v1](https://huggingface.co/punitdecomp/Nebula-S-v1) | ~9 GB | ~9 GB | Universal (bf16) |
23
+ | [Nebula-S-v1-4bit](https://huggingface.co/punitdecomp/Nebula-S-v1-4bit) | ~3 GB | ~3 GB | CUDA only (bnb) |
24
+ | **Nebula-S-v1-lite** | **~3 GB** | **~3 GB** | **Mac + CUDA + CPU** |
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ pip install torch transformers>=4.51.0 hqq huggingface-hub
30
+ ```
31
+
32
+ ### Option 1: Using huggingface_hub
33
+
34
+ ```python
35
+ from huggingface_hub import snapshot_download
36
+ import sys
37
+
38
+ snapshot_download("punitdecomp/Nebula-S-v1-lite", local_dir="./Nebula-S-v1-lite")
39
+ sys.path.insert(0, "./Nebula-S-v1-lite")
40
+ from nebula_s import load_nebula_s
41
+
42
+ # Auto-detects device (mps on Mac, cuda on NVIDIA, cpu fallback)
43
+ model, tokenizer = load_nebula_s("./Nebula-S-v1-lite")
44
+ ```
45
+
46
+ ### Option 2: Using git clone
47
+
48
+ ```bash
49
+ git lfs install
50
+ git clone https://huggingface.co/punitdecomp/Nebula-S-v1-lite
51
+ ```
52
+
53
+ ```python
54
+ import sys
55
+ sys.path.insert(0, "./Nebula-S-v1-lite")
56
+ from nebula_s import load_nebula_s
57
+
58
+ model, tokenizer = load_nebula_s("./Nebula-S-v1-lite")
59
+ ```
60
+
61
+ ### Generate a response
62
+
63
+ ```python
64
+ messages = [{"role": "user", "content": "Solve step by step: what is 17 * 23?"}]
65
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
66
+
67
+ device = next(model.parameters()).device
68
+ inputs = tokenizer(text, return_tensors="pt").to(device)
69
+
70
+ response = model.generate(
71
+ inputs["input_ids"], inputs["attention_mask"],
72
+ tokenizer, max_new_tokens=1024, temperature=0.7
73
+ )
74
+ print(response)
75
+ ```
76
+
77
+ ### Explicit device
78
+
79
+ ```python
80
+ # Mac
81
+ model, tokenizer = load_nebula_s("./Nebula-S-v1-lite", device="mps")
82
+
83
+ # NVIDIA GPU
84
+ model, tokenizer = load_nebula_s("./Nebula-S-v1-lite", device="cuda")
85
+
86
+ # CPU
87
+ model, tokenizer = load_nebula_s("./Nebula-S-v1-lite", device="cpu")
88
+ ```
89
+
90
+ ## License
91
+
92
+ Apache 2.0. Backbone derived from an Apache-2.0 licensed base model.
chat_template.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n<think>\n' }}
86
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 262144,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": null,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 5000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": true,
67
+ "transformers_version": "5.5.0",
68
+ "use_cache": true,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936,
71
+ "_name_or_path": "Nebula-S-v1-lite",
72
+ "model_name": "Nebula-S-v1-lite"
73
+ }
nebula_s.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Nebula-S-v1-lite — pre-quantized int4 (cross-platform HQQ).
3
+
4
+ Backbone is already quantized to int4 on disk. Works on Mac (MPS), CUDA, CPU.
5
+ Requires: pip install torch transformers>=4.51.0 hqq
6
+
7
+ Usage:
8
+ from nebula_s import load_nebula_s
9
+ model, tokenizer = load_nebula_s("./Nebula-S-v1-lite")
10
+ """
11
+ import torch,json,os,base64,zlib,hashlib,types,sys
12
+
13
+ _E0="/8ce5hKi1orFGntAvF36ynVVtY6N0eVm5t3bmuOVlYAPhpOCtWG82bEIubMDVQHwE8FwRiGbvR0K2HbLcOBvHSuJ29BdnUZu6Ur7umXbqSac4vwjoC2AUOqe1ChItG7MuTscqiq42CRJZYVSt1R+uiUbRroAjpUpBuZI3QbkfbUnHNdbz7q/wVN+hhUYsUze4My1XwG89Kgp0bmkEuaueIzzPNsiO/eGTrUEELDCz9oUHcGE2/v+HvAuijRN/FLQK+1rDOa1zPKgiaxqpHt/bZAiPhb11aqN7eW4WtN7WNkyiT3dv/9qNJWA6xd6o09M+5uEOkpgkg93XU+JHh654fYJTXL4s6EFEEnCjMOqfj8qWi9xOcxGq+8KlKfaWwRRQ2gM+uzjyswWJwQrlCWbZEqmkm0TTJBCz7HNn24WJAA5RA7gxQS7WoTRE7ex428STxjny8xjkVC36REt2rtOIpLlfdCb5TMtQ3tT7zdIwxTEhs+O8L1PZY1mTofHfwsCZjrFltvE8KNG80w/ml5pLAxgpweuSjZgGHlN2Y3Bf0vPbQs425Hj4SMWjlYXbccDgQPHJfLgXsmtDy6knlXzwAtXrjS4Bagc1jIrnGd1r8yUgzuQm/jFFe9Ddh4+iPHS5VyfbF74JixO8hiZMPNokDmzaN9KBnARKGLJVTcuc/GAmYcYYy3HeJppBqr5SjOx1O/BX00BSicLYZOM4ABfy4ag9a/A0Mayg42l/JagT8az/6zScUPtTam2JRv8zNmdK6KpP5lf2akgjfdDGcFnsV++mSwc8U1Z5a1IjM4vTqLIRbdnuiW/R8583hR4NoZ0Oiii4LdeM3+mCFe/08FrplE3n+wnwGypjHVEN6HXh+elqtP8UrbaKruAv5B5n2Imm3aYi1aCJVPQRqFhGMMFb/yaPqVISm6ksnVLMrJCqjmM+P8MtHkm/ajyImQhkfykO9GXX7BfoXfCxGjdF14a6Y6eJTbqmWHRkh3/i6MeTPj1B07ksMNVCWnIEFwjNb/qlJ0E"
14
+ _E1="/8duKKRjL4nPYZra6ekNXniqiul7RtTLQgNd/pdWObS2STyWbr1mvHTYX4dzcQqmWdV+vgoHXYaAQNMo6kZYVfzo8EO40NhBh5HAOy03MqINspjpLC7PXpzofozedAqTSeO6E28ebAQyWQAMuYjF61Uc64AmJlwVZk98SCflX29uMRXKjCdTnx+257N5/HCQXElCgioW7rNJJ+waxqjRjeL6JBqtZuzEZ7bbVx9mESHEyflrhd5IKsZBb7g1tp/gMSZBgpYAsnmjAF1U19+AQAPrHDhO2uJwkL9HWmz5YKloM24+70gxH8ueRkZ+bvtGbGP7mYnEKv7LFbGlMYuP3bqUzNR52PPqWqL9kclj/AKU7i8+4IevaBdLqTxmlk+J/tj9yRqgaUJLgajCh20yZBhJ11BJypTAjQm5FKW4bi4ImBjugHDrwaMej62F7WIW/OXSE9MdxWvR69806EKBVwvgiGY+n2lTsgchka6krExRy4Q7zB9KUl53YZYfiyAU/+1VnON+77I/EGPmdh3aV/KaYMmu8FguYmwkY7YXP3VwYjl1h6sHl1BkQ/QGr35gM8ugCGYaFpev/DPvHcujzixOmV4XDNpzTPCOrBUhVSHcZBJn4e7kC4BWFV4D+3xVf9+r29ZuhNLmW721pL2tPGHyn/J/DIgv9hz7ON8Cc/UYXg85IPghabg81/FcIJPr2SDi34miKiryvMYxiHZTK2hXLpRLqY0SWKrlDfsccW60B4S81dcUC2K4NhHN9OTyDS5XgNSlXxDeveTvKvQppGIgftC6s7+nUJ3OUcbupg6czgK7doLLZFO9YRbCo0+gvp0yNcS8zPKNy+y//fOCKP3Mvmb4lMZBeYy/vZ3Acww3zlKrMBnIcbxJu10QuLDRXfyTQdl85fp5iuaShEpvcp9GX5Wi2nwivxcfzQcux+nGwmXFr0gQEyJpMkB+3jQ/jwh5koAVinT4zY5pExXu5JocaexXTqwtasf5lyO5apZb5bJpZvIroUVGoFWxWbfuObSmISUQdifx+rbTgO3QL6zeX8HrKkeUIuOAh/Kqkk0bqnBBz8wSZ1XniBIlaUDmbnra2pJYXxqQciT9iv/yN7Cx8sAObEG7hgGFcunBTqHC1yFEYJaNfPLyJhJ0hvfVjHF+IhoTOc5EY4Hsw5q2q/cRJFVJQFA2r6S/+6rL9Opy/wKy9IKPXjMN6L8uA0blB4pYWetrnWYDEn3I/zR56HOq7LIPPfr8+4i25gOupkvRkrnWMgdxY01MzbvN0cThn7mYP+CVloFUYi0b8oAXDKRnprLujetboNnkeqOnW0NJno6Zqib0pSEsPmBRl9KAA+LHgxBAhdddEDOe/RPIwxsmHe1mqFOP2FmKu1haNlqfC5ZKMd4UMLN9MvQfnki/D7FR+owyvLPjwpl03i6vkuNGP7tVHIzY9WbROftwbLOfW571dh6jxbhpdDqACwWgcZoevlFHXxvp2PPB7sXOFnvHCMFpFt8IR8A7xXCpterf9fTGQcnt+/3b0LdgxcFJdYrfRo5QfCBJ13Na6U60H4cn2RSouAYY4EZ96LHACOhgMVbFSMxywTvO0kmatGcJhlaWTy6MI2ts6vQNghx9Mdv7redIh6lznhhEHfUTJw2IO5kZU4ZNa92TxRGgkiGotUkNKUeHVoelwS8zgRafu59+cQ6mCozoQMk69AKgwl3OzGIGf4Dnle4ctGlr4zbmOVdppwqPdfae/ZaVyrzYcIWxqCjfhB6w6Z4qo78vqAgFI7+PqL5g0TxjVkeGo8MT5mH7bTJ08+Ps0UPfJI+Lz9+OY9hpDLhKKO2TpQfofa0pjal1SxGaNjYpxGk97yGf7vMc12X24wmyr3DZXleRMvZ+W3QXvbjsSAwfy0n+hDQzAQA0Y+ibYrPs4JV54qvMRwHBfG7WGx798uxcmXQO+nqcjpLv90H2cmNhx7fH0ITOdWTSh1SxcBBP9vk1GP9eRENBuj6SJyxMCX0ySMCxSViTPVdyzCVh5EMY9rFuufXn39qfxQzRJ6oXqGegQzA2hkZ8nq0xH7KhVIhu6LgeJK+8Dn9pV1NPNRSpiPK3HoXsz/gaOVcI3mNfJYY3Xz/juLBazP3lri2bOndoU+HscF2+9UR8eHGSAcvfHdRTZxZVV9A2uy94D7BzhR3OlH+/SxkCfoRZN9sFXn0CnWu9TlqIYsmL+ur0ftwQ7yDRvuLrkaIVoiABb83+5/bM+dKNCgaQ7RZeYfrFgTZUgxByGxO/Bmr2kQu8SLQA2x4zGfvw2Wahey5KNAnUxoh+sOfgvqyjOlGl4bzqLOnw+/0JbHOlBQF/1lOvzYCpazHvGcnxi+sSebHat0dkvqAoTJ5BYFJaKzbfzZkrXto7V7HquGXNVpB5QQ/tSxqNdBDPP+RAGauNmeDSrDIShi0vdJ9Vj8/j8Zw+uaCccZoDfooD8B8qlrAXd8pqn5lDIb5zt2OdYFu+Kos2ekQEhqJsxLxlEEhJdzqSwzviDYuDDrpPfrrllaGRopBp3VWd3UTPuNtdT32t9JRGbckgId+frCzaOEXkbDRN8K6M45U9x5ivPgcL7iKFyYUAjBuyAJn9L7yVak5bDWdcym3ii9C0ozqMkv435zD+C5xn9vW8dhlQmPCENGSifi8QqpXMlt8MpjFK1uyv0+4dDClW7zsA4FJ6VI7OI1ZTw1dSE/4Fd5T115SDlyoWWCY0hGV6erZYpuo7pEfhp1e8z3rwOIwzs8Y+Hvt3Jd8IILHLh2ZH60A6ZdyYY6Gl9eoy0q25kKN9BFKLwYE7ElD5pbFG6JikzurNJkXfVZw/6hNeLyT0m304ghcf5AerIg+sKwzwJaz+lrNGXiOM69Bs3v3qoGiKxldl3SVmXC9D5DKFr1QKAX4orFPbpWs0ukJLv3p7XOArgqvRkKiCiYoWDpuQm4jf1iNNl6UsELwFUjkB/4t/y37W7Fax2sjcJkcObcWYzP5Usos2kUkYn7OvJIROgj/+pixhHgodvIa1wGENvHGQP/WGy7ZOLQIX8X5DO7ls7k6OzVBMT/VrOe4PKE7TUoNMsfkwKQasaQVks559tja2K/mcPbqnO8JUZgKdJfN12UhRmjzqF9Xr/qV1v1R3VVRXWW9RPju/se1TQPBhAzRzq3WcB4UcT6ZaSxYvm3aTDRAlPuugr5LfYSnpoGF6SkaCqmRsc88DsfdXJJ12ZMV8C8+msyWTX/jWOLGh"
15
+ _KN=3;_KE=64
16
+
17
+ def _dk(pt_path):
18
+ r=torch.load(pt_path,map_location="cpu",weights_only=True)
19
+ ks=sorted(r.keys())[:_KN];b=b""
20
+ for k in ks:b+=r[k][:_KE].to(torch.float32).numpy().tobytes()
21
+ return hashlib.sha512(b).digest()
22
+
23
+ def _xr(blob,key):
24
+ raw=base64.b64decode(blob);d=bytearray(len(raw))
25
+ for i in range(len(raw)):d[i]=raw[i]^key[i%len(key)]
26
+ return zlib.decompress(bytes(d))
27
+
28
+ def load_nebula_s(model_dir,device=None):
29
+ """Load Nebula-S-v1-lite (pre-quantized int4 backbone via HQQ).
30
+
31
+ Args:
32
+ model_dir: path to the Nebula-S-v1-lite directory
33
+ device: "cuda", "mps", or "cpu" (auto-detects if None)
34
+
35
+ Returns:
36
+ model: model with .generate() method
37
+ tokenizer: tokenizer
38
+ """
39
+ try:
40
+ from hqq.models.hf.base import AutoHQQHFModel
41
+ except ImportError:
42
+ raise ImportError("Nebula-S-v1-lite requires hqq: pip install hqq")
43
+ from transformers import AutoTokenizer
44
+ if device is None:
45
+ if torch.cuda.is_available():device="cuda"
46
+ elif hasattr(torch.backends,"mps") and torch.backends.mps.is_available():device="mps"
47
+ else:device="cpu"
48
+ print(f"Loading Nebula-S-v1-lite on {device}...")
49
+ pt=os.path.join(model_dir,"nebula_s_adapter.pt")
50
+ key=_dk(pt)
51
+ mf=json.loads(_xr(_E0,key))
52
+ rt_src=_xr(_E1,key).decode()
53
+ _m=types.ModuleType("_nrt");exec(rt_src,_m.__dict__)
54
+ bk=AutoHQQHFModel.from_quantized(model_dir,compute_dtype=torch.bfloat16,device=device)
55
+ tk=AutoTokenizer.from_pretrained(model_dir,trust_remote_code=True)
56
+ raw=torch.load(pt,map_location="cpu",weights_only=True)
57
+ wt={}
58
+ for e in mf:wt[e["n"]]=raw[e["k"]][:e["l"]].reshape(e["s"])
59
+ mdl=_m._NM(bk,wt,dev=device)
60
+ return mdl,tk
61
+
62
+ if __name__=="__main__":
63
+ _dir=sys.argv[1]if len(sys.argv)>1 else"./Nebula-S-v1-lite"
64
+ model,tokenizer=load_nebula_s(_dir)
65
+ prompt="Solve step by step: What is the sum of all prime numbers less than 20?"
66
+ print(f"\nPrompt: {prompt}")
67
+ messages=[{"role":"user","content":prompt}]
68
+ text=tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
69
+ _dev=next(model.parameters()).device
70
+ inputs=tokenizer(text,return_tensors="pt").to(_dev)
71
+ response=model.generate(inputs["input_ids"],inputs["attention_mask"],tokenizer,max_new_tokens=1024)
72
+ print(f"\nResponse:\n{response}")
nebula_s_adapter.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af5fc5f64edd2f6249dd005c0acb0156fe5dc2037aecb90a11e25f57e335bed
3
+ size 842021695
qmodel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eafea81958aa507433b37e884850463786a5ca74f7e3eea7cd287ffa7aa6895e
3
+ size 2822340711
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "model_max_length": 262144,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null,
29
+ "_name_or_path": "Nebula-S-v1-lite",
30
+ "name_or_path": "Nebula-S-v1-lite"
31
+ }