AbteeXAILabs commited on
Commit
2272825
·
verified ·
1 Parent(s): ce5cc1d

docs(quickstart): load mirrored local weights (no upstream fetch)

Browse files
Files changed (1) hide show
  1. quickstart.py +53 -64
quickstart.py CHANGED
@@ -1,64 +1,53 @@
1
- """
2
- LumynaX Reasoning QwQ-32B GGUF — LumynaX quickstart.
3
-
4
- This script fetches the upstream model from Hugging Face and runs a short
5
- LumynaX-flavoured prompt. Run it on a host that satisfies the resource budget
6
- documented in the README (LumynaX Reasoning QwQ-32B GGUF).
7
-
8
- Usage:
9
- python quickstart.py # one-shot demo prompt
10
- python quickstart.py --interactive # REPL
11
- python quickstart.py --gguf # use the GGUF mirror via llama-cpp
12
-
13
- LumynaX package repo: https://huggingface.co/AbteeXAILab/lumynax-reasoning-qwq-32b-gguf
14
- Upstream weights: https://huggingface.co/Qwen/QwQ-32B
15
- """
16
- from __future__ import annotations
17
- import argparse, os, sys
18
-
19
- LUMYNAX_SYSTEM = (
20
- "You are LumynaX, the AbteeX AI Labs assistant from Aotearoa New Zealand. "
21
- "Ko te marama te tuapapa - the light is the foundation. "
22
- "Answer with care, cite uncertainty, and prefer local-first reasoning. "
23
- "Refuse unsafe, unlawful, or sovereignty-violating requests."
24
- )
25
- DEMO_PROMPT = "Explain in 3 bullets why local-first AI matters for Aotearoa New Zealand."
26
-
27
- def _run_gguf(prompt: str, interactive: bool):
28
- from llama_cpp import Llama
29
- print("[lumynax] Loading GGUF from Qwen/QwQ-32B-GGUF (this can be large)...")
30
- llm = Llama.from_pretrained(
31
- repo_id="Qwen/QwQ-32B-GGUF",
32
- filename="qwq-32b-q4_k_m.gguf",
33
- n_ctx=16384,
34
- n_gpu_layers=int(os.environ.get("N_GPU_LAYERS", "-1")),
35
- verbose=False,
36
- )
37
- def chat(user):
38
- out = llm.create_chat_completion(messages=[
39
- {"role": "system", "content": LUMYNAX_SYSTEM},
40
- {"role": "user", "content": user},
41
- ], max_tokens=512, temperature=0.4)
42
- return out["choices"][0]["message"]["content"]
43
- if interactive:
44
- print("[lumynax] interactive mode — empty line exits.")
45
- while True:
46
- try: q = input("you> ").strip()
47
- except EOFError: break
48
- if not q: break
49
- print("lumynax> " + chat(q))
50
- else:
51
- print(chat(prompt))
52
-
53
-
54
- def main():
55
- p = argparse.ArgumentParser()
56
- p.add_argument("--interactive", action="store_true")
57
- p.add_argument("--prompt", default=DEMO_PROMPT)
58
- p.add_argument("--gguf", action="store_true", help="kept for compatibility — this build is GGUF-only")
59
- args = p.parse_args()
60
- _run_gguf(args.prompt, args.interactive)
61
-
62
-
63
- if __name__ == "__main__":
64
- main()
 
1
+ """
2
+ Lumynax Reasoning Qwq 32B Gguf — LumynaX quickstart (clone & run).
3
+
4
+ This loads the GGUF that ships with this repo. No upstream HF call needed
5
+ once you've done `hf download AbteeXAILab/lumynax-reasoning-qwq-32b-gguf`.
6
+
7
+ Usage:
8
+ python quickstart.py # one-shot demo prompt
9
+ python quickstart.py --interactive # REPL
10
+ """
11
+ from __future__ import annotations
12
+ import argparse, glob, os, sys
13
+ from pathlib import Path
14
+
15
+ LUMYNAX_SYSTEM = "You are LumynaX, the AbteeX AI Labs assistant from Aotearoa New Zealand. Ko te marama te tuapapa. Answer with care; cite uncertainty; refuse unsafe asks."
16
+ DEMO_PROMPT = "Explain in 3 bullets why local-first AI matters for Aotearoa New Zealand."
17
+
18
+ # Locate the primary GGUF that was downloaded alongside this script.
19
+ HERE = Path(__file__).resolve().parent
20
+ PRIMARY = HERE / r"qwq-32b-q4_k_m.gguf"
21
+
22
+
23
+ def main():
24
+ from llama_cpp import Llama
25
+ p = argparse.ArgumentParser()
26
+ p.add_argument("--interactive", action="store_true")
27
+ p.add_argument("--prompt", default=DEMO_PROMPT)
28
+ args = p.parse_args()
29
+ if not PRIMARY.exists():
30
+ print(f"[lumynax] primary weight file missing: {PRIMARY}", file=sys.stderr)
31
+ print(f"[lumynax] run: hf download AbteeXAILab/lumynax-reasoning-qwq-32b-gguf --local-dir <dir> first.", file=sys.stderr)
32
+ sys.exit(2)
33
+ print(f"[lumynax] loading {PRIMARY.name}{shard_log_suffix}")
34
+ llm = Llama(model_path=str(PRIMARY), n_ctx=16384,
35
+ n_gpu_layers=int(os.environ.get("N_GPU_LAYERS","-1")), verbose=False)
36
+ def chat(user):
37
+ out = llm.create_chat_completion(messages=[
38
+ {"role":"system","content":LUMYNAX_SYSTEM},
39
+ {"role":"user","content":user},
40
+ ], max_tokens=512, temperature=0.4)
41
+ return out["choices"][0]["message"]["content"]
42
+ if args.interactive:
43
+ print("[lumynax] interactive mode — empty line exits.")
44
+ while True:
45
+ try: q = input("you> ").strip()
46
+ except EOFError: break
47
+ if not q: break
48
+ print("lumynax> " + chat(q))
49
+ else:
50
+ print(chat(args.prompt))
51
+
52
+ if __name__ == "__main__":
53
+ main()