AbteeXAILabs's picture
feat: initial LumynaX scaffold (card v6 + quickstart + manifest + Modelfile + Space scaffold)
cc82b99 verified
Raw
History Blame
3.82 kB
"""
LumynaX Translate NLLB-200 3.3B — LumynaX quickstart.
This script fetches the upstream model from Hugging Face and runs a short
LumynaX-flavoured prompt. Run it on a host that satisfies the resource budget
documented in the README (LumynaX Translate NLLB-200 3.3B).
Usage:
python quickstart.py # one-shot demo prompt
python quickstart.py --interactive # REPL
python quickstart.py --gguf # use the GGUF mirror via llama-cpp
LumynaX package repo: https://huggingface.co/AbteeXAILab/lumynax-translate-nllb-200-3b
Upstream weights: https://huggingface.co/facebook/nllb-200-3.3B
"""
from __future__ import annotations
import argparse, os, sys
LUMYNAX_SYSTEM = (
"You are LumynaX, the AbteeX AI Labs assistant from Aotearoa New Zealand. "
"Ko te marama te tuapapa - the light is the foundation. "
"Answer with care, cite uncertainty, and prefer local-first reasoning. "
"Refuse unsafe, unlawful, or sovereignty-violating requests."
)
DEMO_PROMPT = "Explain in 3 bullets why local-first AI matters for Aotearoa New Zealand."
def _run_hf(prompt: str, interactive: bool):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
print("[lumynax] Loading facebook/nllb-200-3.3B. This is a >100B MoE — multi-GPU or accelerate offload recommended.")
tok = AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"facebook/nllb-200-3.3B", device_map="auto", torch_dtype="auto", trust_remote_code=True
)
def chat(user):
messages = [
{"role": "system", "content": LUMYNAX_SYSTEM},
{"role": "user", "content": user},
]
text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tok(text, return_tensors="pt").to(model.device)
out = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.4)
return tok.decode(out[0, inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
if interactive:
print("[lumynax] interactive mode — empty line exits.")
while True:
try: q = input("you> ").strip()
except EOFError: break
if not q: break
print("lumynax> " + chat(q))
else:
print(chat(prompt))
def _run_gguf(prompt: str, interactive: bool):
from llama_cpp import Llama
mirror = ""
if not mirror:
print("[lumynax] No community GGUF mirror registered for this build."); sys.exit(2)
print(f"[lumynax] Loading GGUF from {mirror}...")
llm = Llama.from_pretrained(
repo_id=mirror, filename="*Q4_K_M*.gguf",
n_ctx=1024,
n_gpu_layers=int(os.environ.get("N_GPU_LAYERS", "-1")), verbose=False,
)
def chat(user):
out = llm.create_chat_completion(messages=[
{"role": "system", "content": LUMYNAX_SYSTEM},
{"role": "user", "content": user},
], max_tokens=512, temperature=0.4)
return out["choices"][0]["message"]["content"]
if interactive:
while True:
try: q = input("you> ").strip()
except EOFError: break
if not q: break
print("lumynax> " + chat(q))
else:
print(chat(prompt))
def main():
p = argparse.ArgumentParser()
p.add_argument("--interactive", action="store_true")
p.add_argument("--prompt", default=DEMO_PROMPT)
p.add_argument("--gguf", action="store_true")
args = p.parse_args()
if args.gguf:
_run_gguf(args.prompt, args.interactive)
else:
_run_hf(args.prompt, args.interactive)
if __name__ == "__main__":
main()