Spaces:
Running on Zero
Running on Zero
GitHub Actions commited on
Commit ·
90a59b3
1
Parent(s): f1f7203
fix(llm): single multi-model llm.chat so sponsor backends are reachable
Browse filesRegistry keys local caps by (node,name,version); registering one llm.chat per backend overwrote earlier ones, so HF (registered last) hid Nemotron/Modal/MiniCPM even with NVIDIA_API_KEY set. Now LlmService registers one llm.chat/llm.complete advertising the full model catalogue in params.models and dispatches each call to the owning backend via _resolve_backend. _model_matches and registry _remote_params_compatible honour the catalogue for cross-node routing.
- hearthnet/bus/registry.py +12 -3
- hearthnet/services/llm/service.py +102 -85
- tests/test_components_real.py +1 -1
- tests/test_sponsor_backends.py +56 -0
hearthnet/bus/registry.py
CHANGED
|
@@ -52,11 +52,20 @@ class Registry:
|
|
| 52 |
# Use a general params-compatibility check for remote entries so that
|
| 53 |
# corpus/model/lang routing works across the mesh without needing to
|
| 54 |
# transfer Python callables over the wire.
|
| 55 |
-
offered_params = dict(descriptor.params)
|
| 56 |
-
|
| 57 |
def _remote_params_compatible(offered: dict, requested: dict) -> bool:
|
| 58 |
for key, value in requested.items():
|
| 59 |
-
if value is
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
return False
|
| 61 |
return True
|
| 62 |
|
|
|
|
| 52 |
# Use a general params-compatibility check for remote entries so that
|
| 53 |
# corpus/model/lang routing works across the mesh without needing to
|
| 54 |
# transfer Python callables over the wire.
|
|
|
|
|
|
|
| 55 |
def _remote_params_compatible(offered: dict, requested: dict) -> bool:
|
| 56 |
for key, value in requested.items():
|
| 57 |
+
if value is None:
|
| 58 |
+
continue
|
| 59 |
+
if key == "model":
|
| 60 |
+
# A capability may advertise a catalogue of models it serves
|
| 61 |
+
# ("models") in addition to its primary ("model").
|
| 62 |
+
catalogue = offered.get("models")
|
| 63 |
+
if catalogue and value in catalogue:
|
| 64 |
+
continue
|
| 65 |
+
if offered.get("model") == value:
|
| 66 |
+
continue
|
| 67 |
+
return False
|
| 68 |
+
if key in offered and offered[key] != value:
|
| 69 |
return False
|
| 70 |
return True
|
| 71 |
|
hearthnet/services/llm/service.py
CHANGED
|
@@ -47,90 +47,102 @@ class LlmService:
|
|
| 47 |
self._backends = [_UnavailableBackend()]
|
| 48 |
|
| 49 |
def capabilities(self) -> list[tuple]:
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
"
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
class _UnavailableBackend:
|
|
@@ -251,4 +263,9 @@ class _EchoBackend:
|
|
| 251 |
|
| 252 |
|
| 253 |
def _model_matches(offered: dict, requested: dict) -> bool:
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
self._backends = [_UnavailableBackend()]
|
| 48 |
|
| 49 |
def capabilities(self) -> list[tuple]:
|
| 50 |
+
# Collect every (backend, model) pair across all configured backends.
|
| 51 |
+
# The registry keys local capabilities by (node, name, version), so a
|
| 52 |
+
# separate llm.chat per model would collide and only the last would
|
| 53 |
+
# survive — making additional backends (e.g. sponsor clouds) unreachable.
|
| 54 |
+
# Instead we register ONE llm.chat / llm.complete that advertises the
|
| 55 |
+
# full model catalogue and dispatches to the owning backend by model.
|
| 56 |
+
model_entries = [(backend, bm) for backend in self._backends for bm in backend.models]
|
| 57 |
+
if not model_entries:
|
| 58 |
+
return []
|
| 59 |
+
_primary_backend, primary_bm = model_entries[0]
|
| 60 |
+
model_names = [bm.name for _, bm in model_entries]
|
| 61 |
+
params = {
|
| 62 |
+
"model": primary_bm.name,
|
| 63 |
+
"models": model_names,
|
| 64 |
+
"requires_internet": primary_bm.requires_internet,
|
| 65 |
+
}
|
| 66 |
+
chat_descriptor = CapabilityDescriptor(
|
| 67 |
+
name="llm.chat",
|
| 68 |
+
version=(1, 0),
|
| 69 |
+
stability="stable",
|
| 70 |
+
params=dict(params),
|
| 71 |
+
max_concurrent=2,
|
| 72 |
+
trust_required="member",
|
| 73 |
+
timeout_seconds=120,
|
| 74 |
+
idempotent=False,
|
| 75 |
+
)
|
| 76 |
+
complete_descriptor = CapabilityDescriptor(
|
| 77 |
+
name="llm.complete",
|
| 78 |
+
version=(1, 0),
|
| 79 |
+
stability="stable",
|
| 80 |
+
params=dict(params),
|
| 81 |
+
max_concurrent=2,
|
| 82 |
+
trust_required="member",
|
| 83 |
+
timeout_seconds=120,
|
| 84 |
+
idempotent=False,
|
| 85 |
+
)
|
| 86 |
+
return [
|
| 87 |
+
(chat_descriptor, self._handle_chat, _model_matches),
|
| 88 |
+
(complete_descriptor, self._handle_complete, _model_matches),
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
def _resolve_backend(self, model_name: str) -> tuple[LlmBackend, str]:
|
| 92 |
+
"""Pick the backend that serves ``model_name``; fall back to primary."""
|
| 93 |
+
if model_name:
|
| 94 |
+
for backend in self._backends:
|
| 95 |
+
for bm in backend.models:
|
| 96 |
+
if bm.name == model_name:
|
| 97 |
+
return backend, model_name
|
| 98 |
+
backend = self._backends[0]
|
| 99 |
+
return backend, backend.models[0].name
|
| 100 |
+
|
| 101 |
+
async def _handle_chat(self, req: RouteRequest) -> dict:
|
| 102 |
+
inp = req.body.get("input", {})
|
| 103 |
+
messages = inp.get("messages", [])
|
| 104 |
+
params = req.body.get("params", {})
|
| 105 |
+
backend, model_name = self._resolve_backend(str(params.get("model") or ""))
|
| 106 |
+
temperature = float(params.get("temperature", 0.7))
|
| 107 |
+
max_tokens = int(params.get("max_tokens", 1024))
|
| 108 |
+
try:
|
| 109 |
+
result = await backend.chat(
|
| 110 |
+
messages,
|
| 111 |
+
model=model_name,
|
| 112 |
+
stream=False,
|
| 113 |
+
temperature=temperature,
|
| 114 |
+
max_tokens=max_tokens,
|
| 115 |
+
)
|
| 116 |
+
return {
|
| 117 |
+
"output": {"message": {"role": "assistant", "content": result.text}},
|
| 118 |
+
"meta": {
|
| 119 |
+
"model": result.model,
|
| 120 |
+
"tokens_in": result.tokens_in,
|
| 121 |
+
"tokens_out": result.tokens_out,
|
| 122 |
+
"ms": result.ms,
|
| 123 |
+
},
|
| 124 |
+
}
|
| 125 |
+
except Exception as exc:
|
| 126 |
+
return {"error": "internal_error", "message": str(exc)}
|
| 127 |
+
|
| 128 |
+
async def _handle_complete(self, req: RouteRequest) -> dict:
|
| 129 |
+
inp = req.body.get("input", {})
|
| 130 |
+
prompt = inp.get("prompt", "")
|
| 131 |
+
params = req.body.get("params", {})
|
| 132 |
+
backend, model_name = self._resolve_backend(str(params.get("model") or ""))
|
| 133 |
+
try:
|
| 134 |
+
result = await backend.complete(prompt, model=model_name, stream=False)
|
| 135 |
+
return {
|
| 136 |
+
"output": {"text": result.text},
|
| 137 |
+
"meta": {
|
| 138 |
+
"model": result.model,
|
| 139 |
+
"tokens_in": result.tokens_in,
|
| 140 |
+
"tokens_out": result.tokens_out,
|
| 141 |
+
"ms": result.ms,
|
| 142 |
+
},
|
| 143 |
+
}
|
| 144 |
+
except Exception as exc:
|
| 145 |
+
return {"error": "internal_error", "message": str(exc)}
|
| 146 |
|
| 147 |
|
| 148 |
class _UnavailableBackend:
|
|
|
|
| 263 |
|
| 264 |
|
| 265 |
def _model_matches(offered: dict, requested: dict) -> bool:
|
| 266 |
+
req = requested.get("model")
|
| 267 |
+
if not req:
|
| 268 |
+
return True
|
| 269 |
+
if req == offered.get("model"):
|
| 270 |
+
return True
|
| 271 |
+
return req in (offered.get("models") or [])
|
tests/test_components_real.py
CHANGED
|
@@ -291,7 +291,7 @@ class TestBusRouting:
|
|
| 291 |
def test_unknown_capability_raises(self, mesh):
|
| 292 |
"""Calling a capability no node provides raises, not silently fails."""
|
| 293 |
alice, _ = mesh
|
| 294 |
-
with pytest.raises(Exception, match="not_found|not_implemented|partition"): # BusError
|
| 295 |
_run(alice.bus.call(
|
| 296 |
"nonexistent.capability", (1, 0), {},
|
| 297 |
))
|
|
|
|
| 291 |
def test_unknown_capability_raises(self, mesh):
|
| 292 |
"""Calling a capability no node provides raises, not silently fails."""
|
| 293 |
alice, _ = mesh
|
| 294 |
+
with pytest.raises(Exception, match="not_found|not_implemented|partition|no provider"): # BusError
|
| 295 |
_run(alice.bus.call(
|
| 296 |
"nonexistent.capability", (1, 0), {},
|
| 297 |
))
|
tests/test_sponsor_backends.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Sponsor LLM backends are wired when their env vars are set (prize tracks)."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from hearthnet.node import HearthNode
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _llm_models(node: HearthNode) -> set[str]:
|
| 9 |
+
"""Collect every model name served by registered llm.chat capabilities.
|
| 10 |
+
|
| 11 |
+
LlmService registers a single llm.chat that advertises its full model
|
| 12 |
+
catalogue in params["models"], dispatching to the owning backend by model.
|
| 13 |
+
"""
|
| 14 |
+
models: set[str] = set()
|
| 15 |
+
for e in node.bus.registry.all_local():
|
| 16 |
+
if e.descriptor.name != "llm.chat":
|
| 17 |
+
continue
|
| 18 |
+
primary = e.descriptor.params.get("model")
|
| 19 |
+
if primary:
|
| 20 |
+
models.add(str(primary))
|
| 21 |
+
models.update(str(m) for m in e.descriptor.params.get("models", []))
|
| 22 |
+
return models
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _nemotron_models() -> set[str]:
|
| 26 |
+
from hearthnet.services.llm.backends.nemotron import NemotronBackend
|
| 27 |
+
|
| 28 |
+
backend = NemotronBackend(api_key_env="NVIDIA_API_KEY")
|
| 29 |
+
return {bm.name for bm in backend.models}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def test_nemotron_wired_when_key_set(monkeypatch) -> None:
|
| 33 |
+
monkeypatch.setenv("NVIDIA_API_KEY", "test-key-not-real")
|
| 34 |
+
monkeypatch.delenv("MODAL_ENDPOINT", raising=False)
|
| 35 |
+
node = HearthNode("ed25519:nv", "NV", "ed25519:test-community")
|
| 36 |
+
node.install_services(corpus="t")
|
| 37 |
+
# At least one of Nemotron's models must now be served via llm.chat.
|
| 38 |
+
assert _llm_models(node) & _nemotron_models()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_no_sponsor_backend_without_env(monkeypatch) -> None:
|
| 42 |
+
monkeypatch.delenv("NVIDIA_API_KEY", raising=False)
|
| 43 |
+
monkeypatch.delenv("NEMOTRON_URL", raising=False)
|
| 44 |
+
monkeypatch.delenv("MODAL_ENDPOINT", raising=False)
|
| 45 |
+
monkeypatch.delenv("MINICPM_URL", raising=False)
|
| 46 |
+
node = HearthNode("ed25519:none", "None", "ed25519:test-community")
|
| 47 |
+
node.install_services(corpus="t")
|
| 48 |
+
# Without the key, none of Nemotron's models should be registered.
|
| 49 |
+
assert not (_llm_models(node) & _nemotron_models())
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_nemotron_backend_constructs() -> None:
|
| 53 |
+
from hearthnet.services.llm.backends.nemotron import NemotronBackend
|
| 54 |
+
|
| 55 |
+
backend = NemotronBackend(api_key_env="NVIDIA_API_KEY")
|
| 56 |
+
assert backend.name == "nemotron"
|