driopi commited on
Commit
a7e7f41
·
verified ·
1 Parent(s): 56652bc

Upload folder using huggingface_hub

Browse files
.env.example CHANGED
@@ -11,9 +11,14 @@ ANTHROPIC_API_KEY=
11
  # Whisper
12
  WHISPER_MODEL=openai/whisper-small
13
  WHISPER_MODE=local
 
14
  PRELOAD_WHISPER_ON_STARTUP=false
15
  MAX_AUDIO_DURATION_SECONDS=120
16
 
 
 
 
 
17
  # Optional MCP bridge for LangGraph tools
18
  ENABLE_MCP_TOOLS=false
19
 
 
11
  # Whisper
12
  WHISPER_MODEL=openai/whisper-small
13
  WHISPER_MODE=local
14
+ WHISPER_LANGUAGE=ru
15
  PRELOAD_WHISPER_ON_STARTUP=false
16
  MAX_AUDIO_DURATION_SECONDS=120
17
 
18
+ # Emotion portrait (HF model + heuristic hybrid)
19
+ EMOTION_MODE=local
20
+ EMOTION_MODEL=seara/rubert-tiny2-russian-emotion-detection-ru-go-emotions
21
+
22
  # Optional MCP bridge for LangGraph tools
23
  ENABLE_MCP_TOOLS=false
24
 
README.md CHANGED
@@ -16,6 +16,8 @@ app_port: 7860
16
  - Локальная транскрипция через `openai/whisper-small`
17
  - Озвучка итогов с легкой TTS-моделью (`facebook/mms-tts-rus`) через HF Inference API + mock fallback
18
  - Карточка речевого портрета (стабильность, скрытое напряжение, триггеры, рекомендация) по 9 транскриптам
 
 
19
  - Превью транскрипции и финальная генерация Markdown
20
  - Опциональный MCP bridge для Tavily/Hugging Face tools
21
  - Preflight тесты без запуска реальной Whisper (`mock` режим)
 
16
  - Локальная транскрипция через `openai/whisper-small`
17
  - Озвучка итогов с легкой TTS-моделью (`facebook/mms-tts-rus`) через HF Inference API + mock fallback
18
  - Карточка речевого портрета (стабильность, скрытое напряжение, триггеры, рекомендация) по 9 транскриптам
19
+ - Использует HF emotion model: `seara/rubert-tiny2-russian-emotion-detection-ru-go-emotions`
20
+ - Работает в гибриде: HF-модель + эвристики (устойчиво на CPU free tier)
21
  - Превью транскрипции и финальная генерация Markdown
22
  - Опциональный MCP bridge для Tavily/Hugging Face tools
23
  - Preflight тесты без запуска реальной Whisper (`mock` режим)
app/config.py CHANGED
@@ -35,6 +35,7 @@ class Settings:
35
 
36
  whisper_model: str
37
  whisper_mode: str
 
38
  preload_whisper_on_startup: bool
39
  max_audio_duration_seconds: int
40
 
@@ -49,6 +50,8 @@ class Settings:
49
  tts_model: str
50
  tts_timeout_seconds: int
51
  tts_max_chars: int
 
 
52
 
53
  @classmethod
54
  def from_env(cls) -> "Settings":
@@ -61,6 +64,7 @@ class Settings:
61
  llm_model=os.getenv("LLM_MODEL", "gemini-2.5-flash"),
62
  whisper_model=os.getenv("WHISPER_MODEL", "openai/whisper-small"),
63
  whisper_mode=os.getenv("WHISPER_MODE", "local"),
 
64
  preload_whisper_on_startup=_env_bool("PRELOAD_WHISPER_ON_STARTUP", False),
65
  max_audio_duration_seconds=int(os.getenv("MAX_AUDIO_DURATION_SECONDS", "120")),
66
  enable_mcp_tools=_env_bool("ENABLE_MCP_TOOLS", False),
@@ -74,6 +78,8 @@ class Settings:
74
  tts_model=os.getenv("TTS_MODEL", "facebook/mms-tts-rus"),
75
  tts_timeout_seconds=int(os.getenv("TTS_TIMEOUT_SECONDS", "45")),
76
  tts_max_chars=int(os.getenv("TTS_MAX_CHARS", "900")),
 
 
77
  )
78
 
79
  @staticmethod
 
35
 
36
  whisper_model: str
37
  whisper_mode: str
38
+ whisper_language: Optional[str]
39
  preload_whisper_on_startup: bool
40
  max_audio_duration_seconds: int
41
 
 
50
  tts_model: str
51
  tts_timeout_seconds: int
52
  tts_max_chars: int
53
+ emotion_mode: str
54
+ emotion_model: str
55
 
56
  @classmethod
57
  def from_env(cls) -> "Settings":
 
64
  llm_model=os.getenv("LLM_MODEL", "gemini-2.5-flash"),
65
  whisper_model=os.getenv("WHISPER_MODEL", "openai/whisper-small"),
66
  whisper_mode=os.getenv("WHISPER_MODE", "local"),
67
+ whisper_language=os.getenv("WHISPER_LANGUAGE", "ru"),
68
  preload_whisper_on_startup=_env_bool("PRELOAD_WHISPER_ON_STARTUP", False),
69
  max_audio_duration_seconds=int(os.getenv("MAX_AUDIO_DURATION_SECONDS", "120")),
70
  enable_mcp_tools=_env_bool("ENABLE_MCP_TOOLS", False),
 
78
  tts_model=os.getenv("TTS_MODEL", "facebook/mms-tts-rus"),
79
  tts_timeout_seconds=int(os.getenv("TTS_TIMEOUT_SECONDS", "45")),
80
  tts_max_chars=int(os.getenv("TTS_MAX_CHARS", "900")),
81
+ emotion_mode=os.getenv("EMOTION_MODE", "local"),
82
+ emotion_model=os.getenv("EMOTION_MODEL", "seara/rubert-tiny2-russian-emotion-detection-ru-go-emotions"),
83
  )
84
 
85
  @staticmethod
app/main.py CHANGED
@@ -25,7 +25,7 @@ async def lifespan(app: FastAPI):
25
 
26
  mcp_provider = MCPToolProvider(settings)
27
  llm_service = LLMService(settings, mcp_provider=mcp_provider)
28
- portrait_service = PortraitService()
29
  tts_service = TTSService(settings)
30
 
31
  app.state.settings = settings
@@ -49,6 +49,7 @@ app.add_middleware(
49
  allow_credentials=True,
50
  allow_methods=["*"],
51
  allow_headers=["*"],
 
52
  )
53
 
54
  app.include_router(health_router)
 
25
 
26
  mcp_provider = MCPToolProvider(settings)
27
  llm_service = LLMService(settings, mcp_provider=mcp_provider)
28
+ portrait_service = PortraitService(settings)
29
  tts_service = TTSService(settings)
30
 
31
  app.state.settings = settings
 
49
  allow_credentials=True,
50
  allow_methods=["*"],
51
  allow_headers=["*"],
52
+ expose_headers=["X-TTS-Source"],
53
  )
54
 
55
  app.include_router(health_router)
app/models/portrait.py CHANGED
@@ -5,16 +5,29 @@ from pydantic import BaseModel, Field
5
 
6
  class PortraitSignal(BaseModel):
7
  question_number: int
 
 
 
8
  tension: float
9
  uncertainty: float
10
  valence: float
11
 
12
 
 
 
 
 
 
 
 
 
 
13
  class PortraitCard(BaseModel):
14
  emotional_stability: int = Field(ge=1, le=10)
15
  hidden_tension: int = Field(ge=1, le=10)
16
  confidence_proxy: int = Field(ge=1, le=10)
17
  dominant_emotions: List[str] = Field(default_factory=list)
18
  trigger_questions: List[int] = Field(default_factory=list)
 
19
  recommendation: str
20
  signals: List[PortraitSignal] = Field(default_factory=list)
 
5
 
6
  class PortraitSignal(BaseModel):
7
  question_number: int
8
+ round_number: int
9
+ question_in_round: int
10
+ question_text: str
11
  tension: float
12
  uncertainty: float
13
  valence: float
14
 
15
 
16
+ class PortraitTrigger(BaseModel):
17
+ question_number: int
18
+ round_number: int
19
+ question_in_round: int
20
+ question_text: str
21
+ reason: str
22
+ score: float
23
+
24
+
25
  class PortraitCard(BaseModel):
26
  emotional_stability: int = Field(ge=1, le=10)
27
  hidden_tension: int = Field(ge=1, le=10)
28
  confidence_proxy: int = Field(ge=1, le=10)
29
  dominant_emotions: List[str] = Field(default_factory=list)
30
  trigger_questions: List[int] = Field(default_factory=list)
31
+ triggers: List[PortraitTrigger] = Field(default_factory=list)
32
  recommendation: str
33
  signals: List[PortraitSignal] = Field(default_factory=list)
app/routers/health.py CHANGED
@@ -5,9 +5,12 @@ router = APIRouter(tags=["health"])
5
 
6
  @router.get("/health")
7
  async def health(request: Request):
 
8
  return {
9
  "status": "healthy",
10
  "whisper_loaded": request.app.state.transcription_service.is_loaded,
11
  "tts_provider": request.app.state.tts_service.provider,
12
  "tts_available": request.app.state.tts_service.is_available,
 
 
13
  }
 
5
 
6
  @router.get("/health")
7
  async def health(request: Request):
8
+ settings = request.app.state.settings
9
  return {
10
  "status": "healthy",
11
  "whisper_loaded": request.app.state.transcription_service.is_loaded,
12
  "tts_provider": request.app.state.tts_service.provider,
13
  "tts_available": request.app.state.tts_service.is_available,
14
+ "emotion_mode": settings.emotion_mode,
15
+ "emotion_model": settings.emotion_model,
16
  }
app/services/portrait.py CHANGED
@@ -3,9 +3,11 @@ from __future__ import annotations
3
  import math
4
  import re
5
  from collections import Counter
6
- from typing import List
 
7
 
8
- from app.models.portrait import PortraitCard, PortraitSignal
 
9
  from app.models.session import Answer
10
 
11
  _WORD_RE = re.compile(r"[a-zA-Zа-яА-ЯёЁ]+")
@@ -65,6 +67,11 @@ NEGATIVE_MARKERS = (
65
  "дорог",
66
  )
67
 
 
 
 
 
 
68
 
69
  def _clamp(value: float, lower: float, upper: float) -> float:
70
  return max(lower, min(upper, value))
@@ -75,7 +82,74 @@ def _count_markers(text: str, markers: tuple[str, ...]) -> int:
75
  return sum(lowered.count(marker) for marker in markers)
76
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  class PortraitService:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def _signal_for_answer(self, answer: Answer, index: int) -> tuple[PortraitSignal, list[str]]:
80
  text = answer.audio_transcript or ""
81
  tokens = _WORD_RE.findall(text.lower())
@@ -87,25 +161,51 @@ class PortraitService:
87
  positive_count = _count_markers(text, POSITIVE_MARKERS)
88
  negative_count = _count_markers(text, NEGATIVE_MARKERS)
89
 
90
- valence = _clamp((positive_count - negative_count) / max(1, positive_count + negative_count), -1.0, 1.0)
91
- uncertainty = _clamp((hedge_count - 0.5 * confidence_count) / (token_count / 12 + 1), 0.0, 1.0)
92
- tension = _clamp((stress_count + negative_count + 0.7 * hedge_count) / (token_count / 8 + 1), 0.0, 1.0)
93
-
94
- emotions: list[str] = []
95
- if tension >= 0.62:
96
- emotions.append("напряжение")
97
- if uncertainty >= 0.56:
98
- emotions.append("осторожность")
99
- if valence >= 0.2:
100
- emotions.append("оптимизм")
101
- elif valence <= -0.2:
102
- emotions.append("фрустрация")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  if not emotions:
104
- emotions.append("спокойствие")
105
 
106
  return (
107
  PortraitSignal(
108
  question_number=index + 1,
 
 
 
109
  tension=round(tension, 3),
110
  uncertainty=round(uncertainty, 3),
111
  valence=round(valence, 3),
@@ -121,6 +221,7 @@ class PortraitService:
121
  confidence_proxy=6,
122
  dominant_emotions=["спокойствие"],
123
  trigger_questions=[],
 
124
  recommendation="Недостаточно данных для точного портрета. Нужны ответы на все вопросы.",
125
  signals=[],
126
  )
@@ -130,6 +231,9 @@ class PortraitService:
130
  tensions: list[float] = []
131
  uncertainties: list[float] = []
132
 
 
 
 
133
  for idx, answer in enumerate(answers):
134
  signal, emotions = self._signal_for_answer(answer, idx)
135
  signals.append(signal)
@@ -146,25 +250,50 @@ class PortraitService:
146
  variance = sum((value - mean_tension) ** 2 for value in tensions) / len(tensions)
147
  stdev = math.sqrt(variance)
148
 
149
- emotional_stability = round(_clamp(10 - (stdev * 14 + avg_jump * 10), 1, 10))
150
- hidden_tension = round(_clamp(mean_tension * 10 + max(0.0, stdev - 0.12) * 10, 1, 10))
151
- confidence_proxy = round(_clamp((1 - mean_uncertainty) * 10, 1, 10))
152
-
153
- top_tension = sorted(signals, key=lambda item: item.tension, reverse=True)
154
- trigger_questions = [item.question_number for item in top_tension if item.tension >= 0.35][:2]
155
- if len(trigger_questions) < 2:
156
- for item in top_tension:
157
- if item.question_number not in trigger_questions:
158
- trigger_questions.append(item.question_number)
159
- if len(trigger_questions) == 2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  break
161
 
 
 
162
  if hidden_tension >= 7:
163
  recommendation = "Говорить медленнее в стресс-темах: там растут маркеры напряжения и неопределенности."
164
  elif confidence_proxy <= 4:
165
  recommendation = "Уточнять формулировки короче и конкретнее: сейчас много осторожных оговорок."
 
 
166
  else:
167
- recommendation = "Сохранять текущий темп и добавить больше конкретных цифр в ключевых ответах."
168
 
169
  dominant_emotions = [label for label, _ in emotion_counter.most_common(3)]
170
 
@@ -174,6 +303,7 @@ class PortraitService:
174
  confidence_proxy=confidence_proxy,
175
  dominant_emotions=dominant_emotions,
176
  trigger_questions=trigger_questions,
 
177
  recommendation=recommendation,
178
  signals=signals,
179
  )
 
3
  import math
4
  import re
5
  from collections import Counter
6
+ from functools import lru_cache
7
+ from typing import Any, List
8
 
9
+ from app.config import Settings
10
+ from app.models.portrait import PortraitCard, PortraitSignal, PortraitTrigger
11
  from app.models.session import Answer
12
 
13
  _WORD_RE = re.compile(r"[a-zA-Zа-яА-ЯёЁ]+")
 
67
  "дорог",
68
  )
69
 
70
+ TENSION_LABELS = ("anger", "annoyance", "fear", "nervousness", "sadness", "disappointment", "grief", "remorse")
71
+ NEGATIVE_LABELS = ("anger", "annoyance", "fear", "sadness", "disappointment", "grief", "remorse")
72
+ POSITIVE_LABELS = ("joy", "optimism", "gratitude", "love", "approval", "relief", "pride", "admiration")
73
+ UNCERTAINTY_LABELS = ("nervousness", "fear", "confusion", "realization")
74
+
75
 
76
  def _clamp(value: float, lower: float, upper: float) -> float:
77
  return max(lower, min(upper, value))
 
82
  return sum(lowered.count(marker) for marker in markers)
83
 
84
 
85
+ def _label_sum(scores: dict[str, float], labels: tuple[str, ...]) -> float:
86
+ total = 0.0
87
+ for label, score in scores.items():
88
+ if any(token in label for token in labels):
89
+ total += score
90
+ return _clamp(total, 0.0, 1.0)
91
+
92
+
93
+ def _friendly_label(label: str) -> str:
94
+ mapping = {
95
+ "joy": "радость",
96
+ "optimism": "оптимизм",
97
+ "gratitude": "благодарность",
98
+ "approval": "уверенность",
99
+ "nervousness": "волнение",
100
+ "fear": "тревога",
101
+ "sadness": "грусть",
102
+ "anger": "раздражение",
103
+ "annoyance": "раздражение",
104
+ "confusion": "сомнение",
105
+ "neutral": "спокойствие",
106
+ }
107
+ return mapping.get(label, label)
108
+
109
+
110
  class PortraitService:
111
+ def __init__(self, settings: Settings) -> None:
112
+ self.settings = settings
113
+ self.mode = settings.emotion_mode.lower().strip()
114
+
115
+ @staticmethod
116
+ @lru_cache(maxsize=1)
117
+ def _get_classifier(model_name: str):
118
+ from transformers import pipeline
119
+
120
+ return pipeline("text-classification", model=model_name, device="cpu")
121
+
122
+ def _model_scores(self, text: str) -> dict[str, float]:
123
+ if self.mode != "local":
124
+ return {}
125
+ if not text.strip():
126
+ return {}
127
+
128
+ try:
129
+ classifier = self._get_classifier(self.settings.emotion_model)
130
+ raw: Any = classifier(text[:1200], top_k=None, truncation=True, max_length=512)
131
+ except Exception:
132
+ return {}
133
+
134
+ rows: list[dict[str, Any]] = []
135
+ if isinstance(raw, list) and raw:
136
+ if isinstance(raw[0], dict):
137
+ rows = [item for item in raw if isinstance(item, dict)]
138
+ elif isinstance(raw[0], list):
139
+ nested = raw[0]
140
+ rows = [item for item in nested if isinstance(item, dict)]
141
+
142
+ parsed: dict[str, float] = {}
143
+ for row in rows:
144
+ label = str(row.get("label", "")).strip().lower()
145
+ if not label or label.startswith("label_"):
146
+ continue
147
+ score = float(row.get("score", 0.0))
148
+ if score <= 0:
149
+ continue
150
+ parsed[label] = max(parsed.get(label, 0.0), score)
151
+ return parsed
152
+
153
  def _signal_for_answer(self, answer: Answer, index: int) -> tuple[PortraitSignal, list[str]]:
154
  text = answer.audio_transcript or ""
155
  tokens = _WORD_RE.findall(text.lower())
 
161
  positive_count = _count_markers(text, POSITIVE_MARKERS)
162
  negative_count = _count_markers(text, NEGATIVE_MARKERS)
163
 
164
+ heuristic_valence = _clamp(
165
+ (positive_count - negative_count) / max(1, positive_count + negative_count),
166
+ -1.0,
167
+ 1.0,
168
+ )
169
+ heuristic_uncertainty = _clamp((hedge_count - 0.5 * confidence_count) / (token_count / 12 + 1), 0.0, 1.0)
170
+ heuristic_tension = _clamp((stress_count + negative_count + 0.7 * hedge_count) / (token_count / 8 + 1), 0.0, 1.0)
171
+
172
+ scores = self._model_scores(text)
173
+ has_model_scores = bool(scores)
174
+
175
+ if has_model_scores:
176
+ model_tension = _label_sum(scores, TENSION_LABELS)
177
+ model_uncertainty = _label_sum(scores, UNCERTAINTY_LABELS)
178
+ model_positive = _label_sum(scores, POSITIVE_LABELS)
179
+ model_negative = _label_sum(scores, NEGATIVE_LABELS)
180
+ model_valence = _clamp(model_positive - model_negative, -1.0, 1.0)
181
+ tension = _clamp(0.55 * model_tension + 0.45 * heuristic_tension, 0.0, 1.0)
182
+ uncertainty = _clamp(0.55 * model_uncertainty + 0.45 * heuristic_uncertainty, 0.0, 1.0)
183
+ valence = _clamp(0.55 * model_valence + 0.45 * heuristic_valence, -1.0, 1.0)
184
+ top_labels = sorted(scores.items(), key=lambda item: item[1], reverse=True)[:3]
185
+ emotions = [_friendly_label(label) for label, score in top_labels if score >= 0.14]
186
+ else:
187
+ tension = heuristic_tension
188
+ uncertainty = heuristic_uncertainty
189
+ valence = heuristic_valence
190
+ emotions: list[str] = []
191
+ if tension >= 0.5:
192
+ emotions.append("напряжение")
193
+ if uncertainty >= 0.5:
194
+ emotions.append("осторожность")
195
+ if valence >= 0.2:
196
+ emotions.append("оптимизм")
197
+ elif valence <= -0.2:
198
+ emotions.append("фрустрация")
199
+
200
  if not emotions:
201
+ emotions = ["спокойствие"]
202
 
203
  return (
204
  PortraitSignal(
205
  question_number=index + 1,
206
+ round_number=answer.round_number,
207
+ question_in_round=((index % 3) + 1),
208
+ question_text=answer.question_text,
209
  tension=round(tension, 3),
210
  uncertainty=round(uncertainty, 3),
211
  valence=round(valence, 3),
 
221
  confidence_proxy=6,
222
  dominant_emotions=["спокойствие"],
223
  trigger_questions=[],
224
+ triggers=[],
225
  recommendation="Недостаточно данных для точного портрета. Нужны ответы на все вопросы.",
226
  signals=[],
227
  )
 
231
  tensions: list[float] = []
232
  uncertainties: list[float] = []
233
 
234
+ token_counts = [len(_WORD_RE.findall((answer.audio_transcript or "").lower())) for answer in answers]
235
+ avg_token_count = sum(token_counts) / len(token_counts)
236
+
237
  for idx, answer in enumerate(answers):
238
  signal, emotions = self._signal_for_answer(answer, idx)
239
  signals.append(signal)
 
250
  variance = sum((value - mean_tension) ** 2 for value in tensions) / len(tensions)
251
  stdev = math.sqrt(variance)
252
 
253
+ stability_raw = _clamp(1 - (stdev * 1.7 + avg_jump * 1.15), 0.0, 1.0)
254
+ tension_raw = _clamp(mean_tension * 0.9 + max(0.0, stdev - 0.11) * 1.05, 0.0, 1.0)
255
+ short_answer_penalty = 0.22 if avg_token_count < 5 else 0.0
256
+ confidence_raw = _clamp((1 - mean_uncertainty) - short_answer_penalty, 0.0, 1.0)
257
+
258
+ emotional_stability = round(_clamp(2.5 + stability_raw * 6.0, 1, 10))
259
+ hidden_tension = round(_clamp(2.0 + tension_raw * 6.0, 1, 10))
260
+ confidence_proxy = round(_clamp(2.0 + confidence_raw * 6.0, 1, 10))
261
+
262
+ peak_tension = max(tensions) if tensions else 0.0
263
+ peak_uncertainty = max(uncertainties) if uncertainties else 0.0
264
+ triggers: list[PortraitTrigger] = []
265
+
266
+ if peak_tension >= 0.3 or peak_uncertainty >= 0.34:
267
+ top_signals = sorted(signals, key=lambda item: max(item.tension, item.uncertainty), reverse=True)
268
+ trigger_threshold = max(0.32, mean_tension + 0.08)
269
+ for signal in top_signals:
270
+ peak_score = max(signal.tension, signal.uncertainty)
271
+ if peak_score < trigger_threshold:
272
+ continue
273
+ reason = "рост напряжения" if signal.tension >= signal.uncertainty else "рост неопределенности"
274
+ triggers.append(
275
+ PortraitTrigger(
276
+ question_number=signal.question_number,
277
+ round_number=signal.round_number,
278
+ question_in_round=signal.question_in_round,
279
+ question_text=signal.question_text,
280
+ reason=reason,
281
+ score=round(peak_score, 3),
282
+ )
283
+ )
284
+ if len(triggers) == 2:
285
  break
286
 
287
+ trigger_questions = [item.question_number for item in triggers]
288
+
289
  if hidden_tension >= 7:
290
  recommendation = "Говорить медленнее в стресс-темах: там растут маркеры напряжения и неопределенности."
291
  elif confidence_proxy <= 4:
292
  recommendation = "Уточнять формулировки короче и конкретнее: сейчас много осторожных оговорок."
293
+ elif not triggers:
294
+ recommendation = "Сохранять текущий темп: резких эмоциональных триггеров по ответам не выявлено."
295
  else:
296
+ recommendation = "Добавить больше конкретики в триггерных вопросах и фиксировать метрики сразу в ответе."
297
 
298
  dominant_emotions = [label for label, _ in emotion_counter.most_common(3)]
299
 
 
303
  confidence_proxy=confidence_proxy,
304
  dominant_emotions=dominant_emotions,
305
  trigger_questions=trigger_questions,
306
+ triggers=triggers,
307
  recommendation=recommendation,
308
  signals=signals,
309
  )
app/services/transcription.py CHANGED
@@ -54,7 +54,11 @@ class TranscriptionService:
54
  self._pipeline = self._get_pipeline(self.settings.whisper_model)
55
  self._loaded = True
56
 
57
- result = self._pipeline(str(wav_path))
 
 
 
 
58
  return result["text"].strip()
59
  finally:
60
  if wav_path.exists():
 
54
  self._pipeline = self._get_pipeline(self.settings.whisper_model)
55
  self._loaded = True
56
 
57
+ generate_kwargs = {"task": "transcribe"}
58
+ if self.settings.whisper_language:
59
+ generate_kwargs["language"] = self.settings.whisper_language
60
+
61
+ result = self._pipeline(str(wav_path), generate_kwargs=generate_kwargs)
62
  return result["text"].strip()
63
  finally:
64
  if wav_path.exists():
scripts/preflight.sh CHANGED
@@ -7,6 +7,7 @@ export WHISPER_MODE=mock
7
  export PRELOAD_WHISPER_ON_STARTUP=false
8
  export TTS_PROVIDER=mock
9
  export LLM_PROVIDER=mock
 
10
  unset GEMINI_API_KEY
11
  unset ANTHROPIC_API_KEY
12
  unset HUGGINGFACE_API_KEY
 
7
  export PRELOAD_WHISPER_ON_STARTUP=false
8
  export TTS_PROVIDER=mock
9
  export LLM_PROVIDER=mock
10
+ export EMOTION_MODE=mock
11
  unset GEMINI_API_KEY
12
  unset ANTHROPIC_API_KEY
13
  unset HUGGINGFACE_API_KEY
tests/conftest.py CHANGED
@@ -8,6 +8,7 @@ os.environ["PRELOAD_WHISPER_ON_STARTUP"] = "false"
8
  os.environ["TTS_PROVIDER"] = "mock"
9
  os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
10
  os.environ["LLM_PROVIDER"] = "mock"
 
11
  os.environ.pop("ANTHROPIC_API_KEY", None)
12
  os.environ.pop("HUGGINGFACE_API_KEY", None)
13
  os.environ.pop("GEMINI_API_KEY", None)
 
8
  os.environ["TTS_PROVIDER"] = "mock"
9
  os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
10
  os.environ["LLM_PROVIDER"] = "mock"
11
+ os.environ["EMOTION_MODE"] = "mock"
12
  os.environ.pop("ANTHROPIC_API_KEY", None)
13
  os.environ.pop("HUGGINGFACE_API_KEY", None)
14
  os.environ.pop("GEMINI_API_KEY", None)
tests/test_api_flow.py CHANGED
@@ -66,6 +66,7 @@ def test_full_9_question_flow_and_results(client):
66
  assert 1 <= results_payload["portrait"]["emotional_stability"] <= 10
67
  assert 1 <= results_payload["portrait"]["hidden_tension"] <= 10
68
  assert isinstance(results_payload["portrait"]["trigger_questions"], list)
 
69
 
70
  download = client.get(f"/api/session/{session_id}/download")
71
  assert download.status_code == 200
 
66
  assert 1 <= results_payload["portrait"]["emotional_stability"] <= 10
67
  assert 1 <= results_payload["portrait"]["hidden_tension"] <= 10
68
  assert isinstance(results_payload["portrait"]["trigger_questions"], list)
69
+ assert isinstance(results_payload["portrait"]["triggers"], list)
70
 
71
  download = client.get(f"/api/session/{session_id}/download")
72
  assert download.status_code == 200
tests/test_preflight_unittest.py CHANGED
@@ -9,6 +9,7 @@ os.environ["PRELOAD_WHISPER_ON_STARTUP"] = "false"
9
  os.environ["TTS_PROVIDER"] = "mock"
10
  os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
11
  os.environ["LLM_PROVIDER"] = "mock"
 
12
  os.environ.pop("ANTHROPIC_API_KEY", None)
13
  os.environ.pop("HUGGINGFACE_API_KEY", None)
14
  os.environ.pop("GEMINI_API_KEY", None)
 
9
  os.environ["TTS_PROVIDER"] = "mock"
10
  os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
11
  os.environ["LLM_PROVIDER"] = "mock"
12
+ os.environ["EMOTION_MODE"] = "mock"
13
  os.environ.pop("ANTHROPIC_API_KEY", None)
14
  os.environ.pop("HUGGINGFACE_API_KEY", None)
15
  os.environ.pop("GEMINI_API_KEY", None)