GitHub Actions commited on
Commit
90a59b3
·
1 Parent(s): f1f7203

fix(llm): single multi-model llm.chat so sponsor backends are reachable

Browse files

Registry keys local caps by (node,name,version); registering one llm.chat per backend overwrote earlier ones, so HF (registered last) hid Nemotron/Modal/MiniCPM even with NVIDIA_API_KEY set. Now LlmService registers one llm.chat/llm.complete advertising the full model catalogue in params.models and dispatches each call to the owning backend via _resolve_backend. _model_matches and registry _remote_params_compatible honour the catalogue for cross-node routing.

hearthnet/bus/registry.py CHANGED
@@ -52,11 +52,20 @@ class Registry:
52
  # Use a general params-compatibility check for remote entries so that
53
  # corpus/model/lang routing works across the mesh without needing to
54
  # transfer Python callables over the wire.
55
- offered_params = dict(descriptor.params)
56
-
57
  def _remote_params_compatible(offered: dict, requested: dict) -> bool:
58
  for key, value in requested.items():
59
- if value is not None and key in offered and offered[key] != value:
 
 
 
 
 
 
 
 
 
 
 
60
  return False
61
  return True
62
 
 
52
  # Use a general params-compatibility check for remote entries so that
53
  # corpus/model/lang routing works across the mesh without needing to
54
  # transfer Python callables over the wire.
 
 
55
  def _remote_params_compatible(offered: dict, requested: dict) -> bool:
56
  for key, value in requested.items():
57
+ if value is None:
58
+ continue
59
+ if key == "model":
60
+ # A capability may advertise a catalogue of models it serves
61
+ # ("models") in addition to its primary ("model").
62
+ catalogue = offered.get("models")
63
+ if catalogue and value in catalogue:
64
+ continue
65
+ if offered.get("model") == value:
66
+ continue
67
+ return False
68
+ if key in offered and offered[key] != value:
69
  return False
70
  return True
71
 
hearthnet/services/llm/service.py CHANGED
@@ -47,90 +47,102 @@ class LlmService:
47
  self._backends = [_UnavailableBackend()]
48
 
49
  def capabilities(self) -> list[tuple]:
50
- result = []
51
- for backend in self._backends:
52
- for bm in backend.models:
53
- descriptor = CapabilityDescriptor(
54
- name="llm.chat",
55
- version=(1, 0),
56
- stability="stable",
57
- params={"model": bm.name, "requires_internet": bm.requires_internet},
58
- max_concurrent=2,
59
- trust_required="member",
60
- timeout_seconds=120,
61
- idempotent=False,
62
- )
63
- result.append(
64
- (descriptor, self._make_chat_handler(backend, bm.name), _model_matches)
65
- )
66
- descriptor_complete = CapabilityDescriptor(
67
- name="llm.complete",
68
- version=(1, 0),
69
- stability="stable",
70
- params={"model": bm.name, "requires_internet": bm.requires_internet},
71
- max_concurrent=2,
72
- trust_required="member",
73
- timeout_seconds=120,
74
- idempotent=False,
75
- )
76
- result.append(
77
- (
78
- descriptor_complete,
79
- self._make_complete_handler(backend, bm.name),
80
- _model_matches,
81
- )
82
- )
83
- return result
84
-
85
- def _make_chat_handler(self, backend: LlmBackend, model_name: str):
86
- async def handle_chat(req: RouteRequest) -> dict:
87
- inp = req.body.get("input", {})
88
- messages = inp.get("messages", [])
89
- params = req.body.get("params", {})
90
- temperature = float(params.get("temperature", 0.7))
91
- max_tokens = int(params.get("max_tokens", 1024))
92
- try:
93
- result = await backend.chat(
94
- messages,
95
- model=model_name,
96
- stream=False,
97
- temperature=temperature,
98
- max_tokens=max_tokens,
99
- )
100
- return {
101
- "output": {"message": {"role": "assistant", "content": result.text}},
102
- "meta": {
103
- "model": result.model,
104
- "tokens_in": result.tokens_in,
105
- "tokens_out": result.tokens_out,
106
- "ms": result.ms,
107
- },
108
- }
109
- except Exception as exc:
110
- return {"error": "internal_error", "message": str(exc)}
111
-
112
- return handle_chat
113
-
114
- def _make_complete_handler(self, backend: LlmBackend, model_name: str):
115
- async def handle_complete(req: RouteRequest) -> dict:
116
- inp = req.body.get("input", {})
117
- prompt = inp.get("prompt", "")
118
- params = req.body.get("params", {})
119
- try:
120
- result = await backend.complete(prompt, model=model_name, stream=False)
121
- return {
122
- "output": {"text": result.text},
123
- "meta": {
124
- "model": result.model,
125
- "tokens_in": result.tokens_in,
126
- "tokens_out": result.tokens_out,
127
- "ms": result.ms,
128
- },
129
- }
130
- except Exception as exc:
131
- return {"error": "internal_error", "message": str(exc)}
132
-
133
- return handle_complete
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
 
136
  class _UnavailableBackend:
@@ -251,4 +263,9 @@ class _EchoBackend:
251
 
252
 
253
  def _model_matches(offered: dict, requested: dict) -> bool:
254
- return not requested.get("model") or requested.get("model") == offered.get("model")
 
 
 
 
 
 
47
  self._backends = [_UnavailableBackend()]
48
 
49
  def capabilities(self) -> list[tuple]:
50
+ # Collect every (backend, model) pair across all configured backends.
51
+ # The registry keys local capabilities by (node, name, version), so a
52
+ # separate llm.chat per model would collide and only the last would
53
+ # survive — making additional backends (e.g. sponsor clouds) unreachable.
54
+ # Instead we register ONE llm.chat / llm.complete that advertises the
55
+ # full model catalogue and dispatches to the owning backend by model.
56
+ model_entries = [(backend, bm) for backend in self._backends for bm in backend.models]
57
+ if not model_entries:
58
+ return []
59
+ _primary_backend, primary_bm = model_entries[0]
60
+ model_names = [bm.name for _, bm in model_entries]
61
+ params = {
62
+ "model": primary_bm.name,
63
+ "models": model_names,
64
+ "requires_internet": primary_bm.requires_internet,
65
+ }
66
+ chat_descriptor = CapabilityDescriptor(
67
+ name="llm.chat",
68
+ version=(1, 0),
69
+ stability="stable",
70
+ params=dict(params),
71
+ max_concurrent=2,
72
+ trust_required="member",
73
+ timeout_seconds=120,
74
+ idempotent=False,
75
+ )
76
+ complete_descriptor = CapabilityDescriptor(
77
+ name="llm.complete",
78
+ version=(1, 0),
79
+ stability="stable",
80
+ params=dict(params),
81
+ max_concurrent=2,
82
+ trust_required="member",
83
+ timeout_seconds=120,
84
+ idempotent=False,
85
+ )
86
+ return [
87
+ (chat_descriptor, self._handle_chat, _model_matches),
88
+ (complete_descriptor, self._handle_complete, _model_matches),
89
+ ]
90
+
91
+ def _resolve_backend(self, model_name: str) -> tuple[LlmBackend, str]:
92
+ """Pick the backend that serves ``model_name``; fall back to primary."""
93
+ if model_name:
94
+ for backend in self._backends:
95
+ for bm in backend.models:
96
+ if bm.name == model_name:
97
+ return backend, model_name
98
+ backend = self._backends[0]
99
+ return backend, backend.models[0].name
100
+
101
+ async def _handle_chat(self, req: RouteRequest) -> dict:
102
+ inp = req.body.get("input", {})
103
+ messages = inp.get("messages", [])
104
+ params = req.body.get("params", {})
105
+ backend, model_name = self._resolve_backend(str(params.get("model") or ""))
106
+ temperature = float(params.get("temperature", 0.7))
107
+ max_tokens = int(params.get("max_tokens", 1024))
108
+ try:
109
+ result = await backend.chat(
110
+ messages,
111
+ model=model_name,
112
+ stream=False,
113
+ temperature=temperature,
114
+ max_tokens=max_tokens,
115
+ )
116
+ return {
117
+ "output": {"message": {"role": "assistant", "content": result.text}},
118
+ "meta": {
119
+ "model": result.model,
120
+ "tokens_in": result.tokens_in,
121
+ "tokens_out": result.tokens_out,
122
+ "ms": result.ms,
123
+ },
124
+ }
125
+ except Exception as exc:
126
+ return {"error": "internal_error", "message": str(exc)}
127
+
128
+ async def _handle_complete(self, req: RouteRequest) -> dict:
129
+ inp = req.body.get("input", {})
130
+ prompt = inp.get("prompt", "")
131
+ params = req.body.get("params", {})
132
+ backend, model_name = self._resolve_backend(str(params.get("model") or ""))
133
+ try:
134
+ result = await backend.complete(prompt, model=model_name, stream=False)
135
+ return {
136
+ "output": {"text": result.text},
137
+ "meta": {
138
+ "model": result.model,
139
+ "tokens_in": result.tokens_in,
140
+ "tokens_out": result.tokens_out,
141
+ "ms": result.ms,
142
+ },
143
+ }
144
+ except Exception as exc:
145
+ return {"error": "internal_error", "message": str(exc)}
146
 
147
 
148
  class _UnavailableBackend:
 
263
 
264
 
265
  def _model_matches(offered: dict, requested: dict) -> bool:
266
+ req = requested.get("model")
267
+ if not req:
268
+ return True
269
+ if req == offered.get("model"):
270
+ return True
271
+ return req in (offered.get("models") or [])
tests/test_components_real.py CHANGED
@@ -291,7 +291,7 @@ class TestBusRouting:
291
  def test_unknown_capability_raises(self, mesh):
292
  """Calling a capability no node provides raises, not silently fails."""
293
  alice, _ = mesh
294
- with pytest.raises(Exception, match="not_found|not_implemented|partition"): # BusError
295
  _run(alice.bus.call(
296
  "nonexistent.capability", (1, 0), {},
297
  ))
 
291
  def test_unknown_capability_raises(self, mesh):
292
  """Calling a capability no node provides raises, not silently fails."""
293
  alice, _ = mesh
294
+ with pytest.raises(Exception, match="not_found|not_implemented|partition|no provider"): # BusError
295
  _run(alice.bus.call(
296
  "nonexistent.capability", (1, 0), {},
297
  ))
tests/test_sponsor_backends.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Sponsor LLM backends are wired when their env vars are set (prize tracks)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from hearthnet.node import HearthNode
6
+
7
+
8
+ def _llm_models(node: HearthNode) -> set[str]:
9
+ """Collect every model name served by registered llm.chat capabilities.
10
+
11
+ LlmService registers a single llm.chat that advertises its full model
12
+ catalogue in params["models"], dispatching to the owning backend by model.
13
+ """
14
+ models: set[str] = set()
15
+ for e in node.bus.registry.all_local():
16
+ if e.descriptor.name != "llm.chat":
17
+ continue
18
+ primary = e.descriptor.params.get("model")
19
+ if primary:
20
+ models.add(str(primary))
21
+ models.update(str(m) for m in e.descriptor.params.get("models", []))
22
+ return models
23
+
24
+
25
+ def _nemotron_models() -> set[str]:
26
+ from hearthnet.services.llm.backends.nemotron import NemotronBackend
27
+
28
+ backend = NemotronBackend(api_key_env="NVIDIA_API_KEY")
29
+ return {bm.name for bm in backend.models}
30
+
31
+
32
+ def test_nemotron_wired_when_key_set(monkeypatch) -> None:
33
+ monkeypatch.setenv("NVIDIA_API_KEY", "test-key-not-real")
34
+ monkeypatch.delenv("MODAL_ENDPOINT", raising=False)
35
+ node = HearthNode("ed25519:nv", "NV", "ed25519:test-community")
36
+ node.install_services(corpus="t")
37
+ # At least one of Nemotron's models must now be served via llm.chat.
38
+ assert _llm_models(node) & _nemotron_models()
39
+
40
+
41
+ def test_no_sponsor_backend_without_env(monkeypatch) -> None:
42
+ monkeypatch.delenv("NVIDIA_API_KEY", raising=False)
43
+ monkeypatch.delenv("NEMOTRON_URL", raising=False)
44
+ monkeypatch.delenv("MODAL_ENDPOINT", raising=False)
45
+ monkeypatch.delenv("MINICPM_URL", raising=False)
46
+ node = HearthNode("ed25519:none", "None", "ed25519:test-community")
47
+ node.install_services(corpus="t")
48
+ # Without the key, none of Nemotron's models should be registered.
49
+ assert not (_llm_models(node) & _nemotron_models())
50
+
51
+
52
+ def test_nemotron_backend_constructs() -> None:
53
+ from hearthnet.services.llm.backends.nemotron import NemotronBackend
54
+
55
+ backend = NemotronBackend(api_key_env="NVIDIA_API_KEY")
56
+ assert backend.name == "nemotron"