Cyril Dupland commited on
Commit ·
49420f1
1
Parent(s): 767c8ff
feat voice: update documentation for `trigger_on_push` mode to include JSON message format for triggering flush. Simplify response handling in LangGraphProcessor by sending complete responses in a single block instead of segments, enhancing efficiency in transcript delivery.
Browse files
docs/VOICE_CLIENT_INTEGRATION.md
CHANGED
|
@@ -464,6 +464,22 @@ callObject.on("left-meeting", () => stopTranscriptPolling());
|
|
| 464 |
|
| 465 |
En mode `trigger_on_push`, le serveur bufferise les segments STT jusqu'au `flush`, mais en parallèle il renvoie chaque segment **dès qu'il est reconnu** via le canal applicatif du transport.
|
| 466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
- Côté SmallWebRTC : les segments sont envoyés sur le **data channel** `pipecat-app`.
|
| 468 |
- Côté Daily : les segments arrivent sous forme d'**app-messages** (`callObject.on("app-message")`).
|
| 469 |
|
|
|
|
| 464 |
|
| 465 |
En mode `trigger_on_push`, le serveur bufferise les segments STT jusqu'au `flush`, mais en parallèle il renvoie chaque segment **dès qu'il est reconnu** via le canal applicatif du transport.
|
| 466 |
|
| 467 |
+
Pour déclencher ce `flush`, le client doit envoyer un **message applicatif JSON avec `type` égal à `"flush"`** sur le même canal que les autres messages applicatifs :
|
| 468 |
+
|
| 469 |
+
- Côté SmallWebRTC : via le data channel `pipecat-app` :
|
| 470 |
+
|
| 471 |
+
```javascript
|
| 472 |
+
dc.send(JSON.stringify({ type: "flush" }));
|
| 473 |
+
```
|
| 474 |
+
|
| 475 |
+
- Côté Daily : via un app-message :
|
| 476 |
+
|
| 477 |
+
```javascript
|
| 478 |
+
callObject.sendAppMessage({ type: "flush" }, "*");
|
| 479 |
+
```
|
| 480 |
+
|
| 481 |
+
> ⚠️ D'autres valeurs comme `"trigger_flush"` ne sont **pas** interprétées par le serveur et ne déclenchent pas le flush.
|
| 482 |
+
|
| 483 |
- Côté SmallWebRTC : les segments sont envoyés sur le **data channel** `pipecat-app`.
|
| 484 |
- Côté Daily : les segments arrivent sous forme d'**app-messages** (`callObject.on("app-message")`).
|
| 485 |
|
services/voice/langgraph_processor.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""Pipecat FrameProcessor that routes transcribed text through the LangGraph agent."""
|
| 2 |
import re
|
| 3 |
import logging
|
| 4 |
-
from typing import Optional
|
| 5 |
|
| 6 |
from pipecat.processors.frame_processor import FrameProcessor
|
| 7 |
from pipecat.frames.frames import (
|
|
@@ -56,19 +56,18 @@ class LangGraphProcessor(FrameProcessor):
|
|
| 56 |
clean = self._clean_response_for_tts(response)
|
| 57 |
logger.info("Sending to TTS (cleaned): %s", clean)
|
| 58 |
|
| 59 |
-
segments = self._split_into_segments(clean)
|
| 60 |
-
logger.info("Split response into %d segment(s)", len(segments))
|
| 61 |
-
|
| 62 |
await self.push_frame(LLMFullResponseStartFrame())
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
| 65 |
await self.push_frame(
|
| 66 |
OutputTransportMessageFrame(
|
| 67 |
{
|
| 68 |
"type": "assistant_segment",
|
| 69 |
-
"text":
|
| 70 |
-
"segment_index":
|
| 71 |
-
"total_segments":
|
| 72 |
"conversation_id": self.conversation_id,
|
| 73 |
}
|
| 74 |
)
|
|
@@ -120,57 +119,3 @@ class LangGraphProcessor(FrameProcessor):
|
|
| 120 |
clean = re.sub(r"\s+", " ", clean)
|
| 121 |
|
| 122 |
return clean.strip()
|
| 123 |
-
|
| 124 |
-
# ------------------------------------------------------------------
|
| 125 |
-
# Segmentation helpers
|
| 126 |
-
# ------------------------------------------------------------------
|
| 127 |
-
|
| 128 |
-
@staticmethod
|
| 129 |
-
def _split_into_segments(text: str) -> List[str]:
|
| 130 |
-
"""Split cleaned assistant text into sentence-like segments.
|
| 131 |
-
|
| 132 |
-
Uses simple punctuation-based splitting on `.`, `?`, `!` and
|
| 133 |
-
falls back to a single-segment list if no punctuation is found.
|
| 134 |
-
Very short segments are merged back into neighbours.
|
| 135 |
-
"""
|
| 136 |
-
if not text:
|
| 137 |
-
return []
|
| 138 |
-
|
| 139 |
-
# Split on end-of-sentence punctuation followed by whitespace.
|
| 140 |
-
parts = re.split(r"(?<=[.?!])\s+", text)
|
| 141 |
-
parts = [p.strip() for p in parts if p and p.strip()]
|
| 142 |
-
|
| 143 |
-
if not parts:
|
| 144 |
-
return []
|
| 145 |
-
if len(parts) == 1:
|
| 146 |
-
return parts
|
| 147 |
-
|
| 148 |
-
# Merge very short trailing segments into the previous one to avoid noise.
|
| 149 |
-
merged: List[str] = []
|
| 150 |
-
buffer = ""
|
| 151 |
-
for idx, part in enumerate(parts):
|
| 152 |
-
if buffer:
|
| 153 |
-
candidate = buffer + " " + part
|
| 154 |
-
else:
|
| 155 |
-
candidate = part
|
| 156 |
-
|
| 157 |
-
# Heuristic: keep segments with at least ~10 characters,
|
| 158 |
-
# otherwise merge with the next piece.
|
| 159 |
-
if len(candidate) < 10 and idx < len(parts) - 1:
|
| 160 |
-
buffer = candidate
|
| 161 |
-
continue
|
| 162 |
-
|
| 163 |
-
if buffer and candidate is not buffer:
|
| 164 |
-
merged.append(candidate)
|
| 165 |
-
buffer = ""
|
| 166 |
-
else:
|
| 167 |
-
merged.append(part)
|
| 168 |
-
buffer = ""
|
| 169 |
-
|
| 170 |
-
if buffer:
|
| 171 |
-
if merged:
|
| 172 |
-
merged[-1] = merged[-1] + " " + buffer
|
| 173 |
-
else:
|
| 174 |
-
merged.append(buffer)
|
| 175 |
-
|
| 176 |
-
return merged
|
|
|
|
| 1 |
"""Pipecat FrameProcessor that routes transcribed text through the LangGraph agent."""
|
| 2 |
import re
|
| 3 |
import logging
|
| 4 |
+
from typing import Optional
|
| 5 |
|
| 6 |
from pipecat.processors.frame_processor import FrameProcessor
|
| 7 |
from pipecat.frames.frames import (
|
|
|
|
| 56 |
clean = self._clean_response_for_tts(response)
|
| 57 |
logger.info("Sending to TTS (cleaned): %s", clean)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
| 59 |
await self.push_frame(LLMFullResponseStartFrame())
|
| 60 |
+
if clean:
|
| 61 |
+
# Envoyer la réponse complète en un seul bloc vers le TTS…
|
| 62 |
+
await self.push_frame(TextFrame(clean))
|
| 63 |
+
# …et en parallèle vers le client via un seul message transport.
|
| 64 |
await self.push_frame(
|
| 65 |
OutputTransportMessageFrame(
|
| 66 |
{
|
| 67 |
"type": "assistant_segment",
|
| 68 |
+
"text": clean,
|
| 69 |
+
"segment_index": 1,
|
| 70 |
+
"total_segments": 1,
|
| 71 |
"conversation_id": self.conversation_id,
|
| 72 |
}
|
| 73 |
)
|
|
|
|
| 119 |
clean = re.sub(r"\s+", " ", clean)
|
| 120 |
|
| 121 |
return clean.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|