Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .env.example +5 -0
- README.md +2 -0
- app/config.py +6 -0
- app/main.py +2 -1
- app/models/portrait.py +13 -0
- app/routers/health.py +3 -0
- app/services/portrait.py +158 -28
- app/services/transcription.py +5 -1
- scripts/preflight.sh +1 -0
- tests/conftest.py +1 -0
- tests/test_api_flow.py +1 -0
- tests/test_preflight_unittest.py +1 -0
.env.example
CHANGED
|
@@ -11,9 +11,14 @@ ANTHROPIC_API_KEY=
|
|
| 11 |
# Whisper
|
| 12 |
WHISPER_MODEL=openai/whisper-small
|
| 13 |
WHISPER_MODE=local
|
|
|
|
| 14 |
PRELOAD_WHISPER_ON_STARTUP=false
|
| 15 |
MAX_AUDIO_DURATION_SECONDS=120
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Optional MCP bridge for LangGraph tools
|
| 18 |
ENABLE_MCP_TOOLS=false
|
| 19 |
|
|
|
|
| 11 |
# Whisper
|
| 12 |
WHISPER_MODEL=openai/whisper-small
|
| 13 |
WHISPER_MODE=local
|
| 14 |
+
WHISPER_LANGUAGE=ru
|
| 15 |
PRELOAD_WHISPER_ON_STARTUP=false
|
| 16 |
MAX_AUDIO_DURATION_SECONDS=120
|
| 17 |
|
| 18 |
+
# Emotion portrait (HF model + heuristic hybrid)
|
| 19 |
+
EMOTION_MODE=local
|
| 20 |
+
EMOTION_MODEL=seara/rubert-tiny2-russian-emotion-detection-ru-go-emotions
|
| 21 |
+
|
| 22 |
# Optional MCP bridge for LangGraph tools
|
| 23 |
ENABLE_MCP_TOOLS=false
|
| 24 |
|
README.md
CHANGED
|
@@ -16,6 +16,8 @@ app_port: 7860
|
|
| 16 |
- Локальная транскрипция через `openai/whisper-small`
|
| 17 |
- Озвучка итогов с легкой TTS-моделью (`facebook/mms-tts-rus`) через HF Inference API + mock fallback
|
| 18 |
- Карточка речевого портрета (стабильность, скрытое напряжение, триггеры, рекомендация) по 9 транскриптам
|
|
|
|
|
|
|
| 19 |
- Превью транскрипции и финальная генерация Markdown
|
| 20 |
- Опциональный MCP bridge для Tavily/Hugging Face tools
|
| 21 |
- Preflight тесты без запуска реальной Whisper (`mock` режим)
|
|
|
|
| 16 |
- Локальная транскрипция через `openai/whisper-small`
|
| 17 |
- Озвучка итогов с легкой TTS-моделью (`facebook/mms-tts-rus`) через HF Inference API + mock fallback
|
| 18 |
- Карточка речевого портрета (стабильность, скрытое напряжение, триггеры, рекомендация) по 9 транскриптам
|
| 19 |
+
- Использует HF emotion model: `seara/rubert-tiny2-russian-emotion-detection-ru-go-emotions`
|
| 20 |
+
- Работает в гибриде: HF-модель + эвристики (устойчиво на CPU free tier)
|
| 21 |
- Превью транскрипции и финальная генерация Markdown
|
| 22 |
- Опциональный MCP bridge для Tavily/Hugging Face tools
|
| 23 |
- Preflight тесты без запуска реальной Whisper (`mock` режим)
|
app/config.py
CHANGED
|
@@ -35,6 +35,7 @@ class Settings:
|
|
| 35 |
|
| 36 |
whisper_model: str
|
| 37 |
whisper_mode: str
|
|
|
|
| 38 |
preload_whisper_on_startup: bool
|
| 39 |
max_audio_duration_seconds: int
|
| 40 |
|
|
@@ -49,6 +50,8 @@ class Settings:
|
|
| 49 |
tts_model: str
|
| 50 |
tts_timeout_seconds: int
|
| 51 |
tts_max_chars: int
|
|
|
|
|
|
|
| 52 |
|
| 53 |
@classmethod
|
| 54 |
def from_env(cls) -> "Settings":
|
|
@@ -61,6 +64,7 @@ class Settings:
|
|
| 61 |
llm_model=os.getenv("LLM_MODEL", "gemini-2.5-flash"),
|
| 62 |
whisper_model=os.getenv("WHISPER_MODEL", "openai/whisper-small"),
|
| 63 |
whisper_mode=os.getenv("WHISPER_MODE", "local"),
|
|
|
|
| 64 |
preload_whisper_on_startup=_env_bool("PRELOAD_WHISPER_ON_STARTUP", False),
|
| 65 |
max_audio_duration_seconds=int(os.getenv("MAX_AUDIO_DURATION_SECONDS", "120")),
|
| 66 |
enable_mcp_tools=_env_bool("ENABLE_MCP_TOOLS", False),
|
|
@@ -74,6 +78,8 @@ class Settings:
|
|
| 74 |
tts_model=os.getenv("TTS_MODEL", "facebook/mms-tts-rus"),
|
| 75 |
tts_timeout_seconds=int(os.getenv("TTS_TIMEOUT_SECONDS", "45")),
|
| 76 |
tts_max_chars=int(os.getenv("TTS_MAX_CHARS", "900")),
|
|
|
|
|
|
|
| 77 |
)
|
| 78 |
|
| 79 |
@staticmethod
|
|
|
|
| 35 |
|
| 36 |
whisper_model: str
|
| 37 |
whisper_mode: str
|
| 38 |
+
whisper_language: Optional[str]
|
| 39 |
preload_whisper_on_startup: bool
|
| 40 |
max_audio_duration_seconds: int
|
| 41 |
|
|
|
|
| 50 |
tts_model: str
|
| 51 |
tts_timeout_seconds: int
|
| 52 |
tts_max_chars: int
|
| 53 |
+
emotion_mode: str
|
| 54 |
+
emotion_model: str
|
| 55 |
|
| 56 |
@classmethod
|
| 57 |
def from_env(cls) -> "Settings":
|
|
|
|
| 64 |
llm_model=os.getenv("LLM_MODEL", "gemini-2.5-flash"),
|
| 65 |
whisper_model=os.getenv("WHISPER_MODEL", "openai/whisper-small"),
|
| 66 |
whisper_mode=os.getenv("WHISPER_MODE", "local"),
|
| 67 |
+
whisper_language=os.getenv("WHISPER_LANGUAGE", "ru"),
|
| 68 |
preload_whisper_on_startup=_env_bool("PRELOAD_WHISPER_ON_STARTUP", False),
|
| 69 |
max_audio_duration_seconds=int(os.getenv("MAX_AUDIO_DURATION_SECONDS", "120")),
|
| 70 |
enable_mcp_tools=_env_bool("ENABLE_MCP_TOOLS", False),
|
|
|
|
| 78 |
tts_model=os.getenv("TTS_MODEL", "facebook/mms-tts-rus"),
|
| 79 |
tts_timeout_seconds=int(os.getenv("TTS_TIMEOUT_SECONDS", "45")),
|
| 80 |
tts_max_chars=int(os.getenv("TTS_MAX_CHARS", "900")),
|
| 81 |
+
emotion_mode=os.getenv("EMOTION_MODE", "local"),
|
| 82 |
+
emotion_model=os.getenv("EMOTION_MODEL", "seara/rubert-tiny2-russian-emotion-detection-ru-go-emotions"),
|
| 83 |
)
|
| 84 |
|
| 85 |
@staticmethod
|
app/main.py
CHANGED
|
@@ -25,7 +25,7 @@ async def lifespan(app: FastAPI):
|
|
| 25 |
|
| 26 |
mcp_provider = MCPToolProvider(settings)
|
| 27 |
llm_service = LLMService(settings, mcp_provider=mcp_provider)
|
| 28 |
-
portrait_service = PortraitService()
|
| 29 |
tts_service = TTSService(settings)
|
| 30 |
|
| 31 |
app.state.settings = settings
|
|
@@ -49,6 +49,7 @@ app.add_middleware(
|
|
| 49 |
allow_credentials=True,
|
| 50 |
allow_methods=["*"],
|
| 51 |
allow_headers=["*"],
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
app.include_router(health_router)
|
|
|
|
| 25 |
|
| 26 |
mcp_provider = MCPToolProvider(settings)
|
| 27 |
llm_service = LLMService(settings, mcp_provider=mcp_provider)
|
| 28 |
+
portrait_service = PortraitService(settings)
|
| 29 |
tts_service = TTSService(settings)
|
| 30 |
|
| 31 |
app.state.settings = settings
|
|
|
|
| 49 |
allow_credentials=True,
|
| 50 |
allow_methods=["*"],
|
| 51 |
allow_headers=["*"],
|
| 52 |
+
expose_headers=["X-TTS-Source"],
|
| 53 |
)
|
| 54 |
|
| 55 |
app.include_router(health_router)
|
app/models/portrait.py
CHANGED
|
@@ -5,16 +5,29 @@ from pydantic import BaseModel, Field
|
|
| 5 |
|
| 6 |
class PortraitSignal(BaseModel):
|
| 7 |
question_number: int
|
|
|
|
|
|
|
|
|
|
| 8 |
tension: float
|
| 9 |
uncertainty: float
|
| 10 |
valence: float
|
| 11 |
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
class PortraitCard(BaseModel):
|
| 14 |
emotional_stability: int = Field(ge=1, le=10)
|
| 15 |
hidden_tension: int = Field(ge=1, le=10)
|
| 16 |
confidence_proxy: int = Field(ge=1, le=10)
|
| 17 |
dominant_emotions: List[str] = Field(default_factory=list)
|
| 18 |
trigger_questions: List[int] = Field(default_factory=list)
|
|
|
|
| 19 |
recommendation: str
|
| 20 |
signals: List[PortraitSignal] = Field(default_factory=list)
|
|
|
|
| 5 |
|
| 6 |
class PortraitSignal(BaseModel):
|
| 7 |
question_number: int
|
| 8 |
+
round_number: int
|
| 9 |
+
question_in_round: int
|
| 10 |
+
question_text: str
|
| 11 |
tension: float
|
| 12 |
uncertainty: float
|
| 13 |
valence: float
|
| 14 |
|
| 15 |
|
| 16 |
+
class PortraitTrigger(BaseModel):
|
| 17 |
+
question_number: int
|
| 18 |
+
round_number: int
|
| 19 |
+
question_in_round: int
|
| 20 |
+
question_text: str
|
| 21 |
+
reason: str
|
| 22 |
+
score: float
|
| 23 |
+
|
| 24 |
+
|
| 25 |
class PortraitCard(BaseModel):
|
| 26 |
emotional_stability: int = Field(ge=1, le=10)
|
| 27 |
hidden_tension: int = Field(ge=1, le=10)
|
| 28 |
confidence_proxy: int = Field(ge=1, le=10)
|
| 29 |
dominant_emotions: List[str] = Field(default_factory=list)
|
| 30 |
trigger_questions: List[int] = Field(default_factory=list)
|
| 31 |
+
triggers: List[PortraitTrigger] = Field(default_factory=list)
|
| 32 |
recommendation: str
|
| 33 |
signals: List[PortraitSignal] = Field(default_factory=list)
|
app/routers/health.py
CHANGED
|
@@ -5,9 +5,12 @@ router = APIRouter(tags=["health"])
|
|
| 5 |
|
| 6 |
@router.get("/health")
|
| 7 |
async def health(request: Request):
|
|
|
|
| 8 |
return {
|
| 9 |
"status": "healthy",
|
| 10 |
"whisper_loaded": request.app.state.transcription_service.is_loaded,
|
| 11 |
"tts_provider": request.app.state.tts_service.provider,
|
| 12 |
"tts_available": request.app.state.tts_service.is_available,
|
|
|
|
|
|
|
| 13 |
}
|
|
|
|
| 5 |
|
| 6 |
@router.get("/health")
|
| 7 |
async def health(request: Request):
|
| 8 |
+
settings = request.app.state.settings
|
| 9 |
return {
|
| 10 |
"status": "healthy",
|
| 11 |
"whisper_loaded": request.app.state.transcription_service.is_loaded,
|
| 12 |
"tts_provider": request.app.state.tts_service.provider,
|
| 13 |
"tts_available": request.app.state.tts_service.is_available,
|
| 14 |
+
"emotion_mode": settings.emotion_mode,
|
| 15 |
+
"emotion_model": settings.emotion_model,
|
| 16 |
}
|
app/services/portrait.py
CHANGED
|
@@ -3,9 +3,11 @@ from __future__ import annotations
|
|
| 3 |
import math
|
| 4 |
import re
|
| 5 |
from collections import Counter
|
| 6 |
-
from
|
|
|
|
| 7 |
|
| 8 |
-
from app.
|
|
|
|
| 9 |
from app.models.session import Answer
|
| 10 |
|
| 11 |
_WORD_RE = re.compile(r"[a-zA-Zа-яА-ЯёЁ]+")
|
|
@@ -65,6 +67,11 @@ NEGATIVE_MARKERS = (
|
|
| 65 |
"дорог",
|
| 66 |
)
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
def _clamp(value: float, lower: float, upper: float) -> float:
|
| 70 |
return max(lower, min(upper, value))
|
|
@@ -75,7 +82,74 @@ def _count_markers(text: str, markers: tuple[str, ...]) -> int:
|
|
| 75 |
return sum(lowered.count(marker) for marker in markers)
|
| 76 |
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
class PortraitService:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def _signal_for_answer(self, answer: Answer, index: int) -> tuple[PortraitSignal, list[str]]:
|
| 80 |
text = answer.audio_transcript or ""
|
| 81 |
tokens = _WORD_RE.findall(text.lower())
|
|
@@ -87,25 +161,51 @@ class PortraitService:
|
|
| 87 |
positive_count = _count_markers(text, POSITIVE_MARKERS)
|
| 88 |
negative_count = _count_markers(text, NEGATIVE_MARKERS)
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
if not emotions:
|
| 104 |
-
emotions
|
| 105 |
|
| 106 |
return (
|
| 107 |
PortraitSignal(
|
| 108 |
question_number=index + 1,
|
|
|
|
|
|
|
|
|
|
| 109 |
tension=round(tension, 3),
|
| 110 |
uncertainty=round(uncertainty, 3),
|
| 111 |
valence=round(valence, 3),
|
|
@@ -121,6 +221,7 @@ class PortraitService:
|
|
| 121 |
confidence_proxy=6,
|
| 122 |
dominant_emotions=["спокойствие"],
|
| 123 |
trigger_questions=[],
|
|
|
|
| 124 |
recommendation="Недостаточно данных для точного портрета. Нужны ответы на все вопросы.",
|
| 125 |
signals=[],
|
| 126 |
)
|
|
@@ -130,6 +231,9 @@ class PortraitService:
|
|
| 130 |
tensions: list[float] = []
|
| 131 |
uncertainties: list[float] = []
|
| 132 |
|
|
|
|
|
|
|
|
|
|
| 133 |
for idx, answer in enumerate(answers):
|
| 134 |
signal, emotions = self._signal_for_answer(answer, idx)
|
| 135 |
signals.append(signal)
|
|
@@ -146,25 +250,50 @@ class PortraitService:
|
|
| 146 |
variance = sum((value - mean_tension) ** 2 for value in tensions) / len(tensions)
|
| 147 |
stdev = math.sqrt(variance)
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
break
|
| 161 |
|
|
|
|
|
|
|
| 162 |
if hidden_tension >= 7:
|
| 163 |
recommendation = "Говорить медленнее в стресс-темах: там растут маркеры напряжения и неопределенности."
|
| 164 |
elif confidence_proxy <= 4:
|
| 165 |
recommendation = "Уточнять формулировки короче и конкретнее: сейчас много осторожных оговорок."
|
|
|
|
|
|
|
| 166 |
else:
|
| 167 |
-
recommendation = "
|
| 168 |
|
| 169 |
dominant_emotions = [label for label, _ in emotion_counter.most_common(3)]
|
| 170 |
|
|
@@ -174,6 +303,7 @@ class PortraitService:
|
|
| 174 |
confidence_proxy=confidence_proxy,
|
| 175 |
dominant_emotions=dominant_emotions,
|
| 176 |
trigger_questions=trigger_questions,
|
|
|
|
| 177 |
recommendation=recommendation,
|
| 178 |
signals=signals,
|
| 179 |
)
|
|
|
|
| 3 |
import math
|
| 4 |
import re
|
| 5 |
from collections import Counter
|
| 6 |
+
from functools import lru_cache
|
| 7 |
+
from typing import Any, List
|
| 8 |
|
| 9 |
+
from app.config import Settings
|
| 10 |
+
from app.models.portrait import PortraitCard, PortraitSignal, PortraitTrigger
|
| 11 |
from app.models.session import Answer
|
| 12 |
|
| 13 |
_WORD_RE = re.compile(r"[a-zA-Zа-яА-ЯёЁ]+")
|
|
|
|
| 67 |
"дорог",
|
| 68 |
)
|
| 69 |
|
| 70 |
+
TENSION_LABELS = ("anger", "annoyance", "fear", "nervousness", "sadness", "disappointment", "grief", "remorse")
|
| 71 |
+
NEGATIVE_LABELS = ("anger", "annoyance", "fear", "sadness", "disappointment", "grief", "remorse")
|
| 72 |
+
POSITIVE_LABELS = ("joy", "optimism", "gratitude", "love", "approval", "relief", "pride", "admiration")
|
| 73 |
+
UNCERTAINTY_LABELS = ("nervousness", "fear", "confusion", "realization")
|
| 74 |
+
|
| 75 |
|
| 76 |
def _clamp(value: float, lower: float, upper: float) -> float:
|
| 77 |
return max(lower, min(upper, value))
|
|
|
|
| 82 |
return sum(lowered.count(marker) for marker in markers)
|
| 83 |
|
| 84 |
|
| 85 |
+
def _label_sum(scores: dict[str, float], labels: tuple[str, ...]) -> float:
|
| 86 |
+
total = 0.0
|
| 87 |
+
for label, score in scores.items():
|
| 88 |
+
if any(token in label for token in labels):
|
| 89 |
+
total += score
|
| 90 |
+
return _clamp(total, 0.0, 1.0)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _friendly_label(label: str) -> str:
|
| 94 |
+
mapping = {
|
| 95 |
+
"joy": "радость",
|
| 96 |
+
"optimism": "оптимизм",
|
| 97 |
+
"gratitude": "благодарность",
|
| 98 |
+
"approval": "уверенность",
|
| 99 |
+
"nervousness": "волнение",
|
| 100 |
+
"fear": "тревога",
|
| 101 |
+
"sadness": "грусть",
|
| 102 |
+
"anger": "раздражение",
|
| 103 |
+
"annoyance": "раздражение",
|
| 104 |
+
"confusion": "сомнение",
|
| 105 |
+
"neutral": "спокойствие",
|
| 106 |
+
}
|
| 107 |
+
return mapping.get(label, label)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
class PortraitService:
|
| 111 |
+
def __init__(self, settings: Settings) -> None:
|
| 112 |
+
self.settings = settings
|
| 113 |
+
self.mode = settings.emotion_mode.lower().strip()
|
| 114 |
+
|
| 115 |
+
@staticmethod
|
| 116 |
+
@lru_cache(maxsize=1)
|
| 117 |
+
def _get_classifier(model_name: str):
|
| 118 |
+
from transformers import pipeline
|
| 119 |
+
|
| 120 |
+
return pipeline("text-classification", model=model_name, device="cpu")
|
| 121 |
+
|
| 122 |
+
def _model_scores(self, text: str) -> dict[str, float]:
|
| 123 |
+
if self.mode != "local":
|
| 124 |
+
return {}
|
| 125 |
+
if not text.strip():
|
| 126 |
+
return {}
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
classifier = self._get_classifier(self.settings.emotion_model)
|
| 130 |
+
raw: Any = classifier(text[:1200], top_k=None, truncation=True, max_length=512)
|
| 131 |
+
except Exception:
|
| 132 |
+
return {}
|
| 133 |
+
|
| 134 |
+
rows: list[dict[str, Any]] = []
|
| 135 |
+
if isinstance(raw, list) and raw:
|
| 136 |
+
if isinstance(raw[0], dict):
|
| 137 |
+
rows = [item for item in raw if isinstance(item, dict)]
|
| 138 |
+
elif isinstance(raw[0], list):
|
| 139 |
+
nested = raw[0]
|
| 140 |
+
rows = [item for item in nested if isinstance(item, dict)]
|
| 141 |
+
|
| 142 |
+
parsed: dict[str, float] = {}
|
| 143 |
+
for row in rows:
|
| 144 |
+
label = str(row.get("label", "")).strip().lower()
|
| 145 |
+
if not label or label.startswith("label_"):
|
| 146 |
+
continue
|
| 147 |
+
score = float(row.get("score", 0.0))
|
| 148 |
+
if score <= 0:
|
| 149 |
+
continue
|
| 150 |
+
parsed[label] = max(parsed.get(label, 0.0), score)
|
| 151 |
+
return parsed
|
| 152 |
+
|
| 153 |
def _signal_for_answer(self, answer: Answer, index: int) -> tuple[PortraitSignal, list[str]]:
|
| 154 |
text = answer.audio_transcript or ""
|
| 155 |
tokens = _WORD_RE.findall(text.lower())
|
|
|
|
| 161 |
positive_count = _count_markers(text, POSITIVE_MARKERS)
|
| 162 |
negative_count = _count_markers(text, NEGATIVE_MARKERS)
|
| 163 |
|
| 164 |
+
heuristic_valence = _clamp(
|
| 165 |
+
(positive_count - negative_count) / max(1, positive_count + negative_count),
|
| 166 |
+
-1.0,
|
| 167 |
+
1.0,
|
| 168 |
+
)
|
| 169 |
+
heuristic_uncertainty = _clamp((hedge_count - 0.5 * confidence_count) / (token_count / 12 + 1), 0.0, 1.0)
|
| 170 |
+
heuristic_tension = _clamp((stress_count + negative_count + 0.7 * hedge_count) / (token_count / 8 + 1), 0.0, 1.0)
|
| 171 |
+
|
| 172 |
+
scores = self._model_scores(text)
|
| 173 |
+
has_model_scores = bool(scores)
|
| 174 |
+
|
| 175 |
+
if has_model_scores:
|
| 176 |
+
model_tension = _label_sum(scores, TENSION_LABELS)
|
| 177 |
+
model_uncertainty = _label_sum(scores, UNCERTAINTY_LABELS)
|
| 178 |
+
model_positive = _label_sum(scores, POSITIVE_LABELS)
|
| 179 |
+
model_negative = _label_sum(scores, NEGATIVE_LABELS)
|
| 180 |
+
model_valence = _clamp(model_positive - model_negative, -1.0, 1.0)
|
| 181 |
+
tension = _clamp(0.55 * model_tension + 0.45 * heuristic_tension, 0.0, 1.0)
|
| 182 |
+
uncertainty = _clamp(0.55 * model_uncertainty + 0.45 * heuristic_uncertainty, 0.0, 1.0)
|
| 183 |
+
valence = _clamp(0.55 * model_valence + 0.45 * heuristic_valence, -1.0, 1.0)
|
| 184 |
+
top_labels = sorted(scores.items(), key=lambda item: item[1], reverse=True)[:3]
|
| 185 |
+
emotions = [_friendly_label(label) for label, score in top_labels if score >= 0.14]
|
| 186 |
+
else:
|
| 187 |
+
tension = heuristic_tension
|
| 188 |
+
uncertainty = heuristic_uncertainty
|
| 189 |
+
valence = heuristic_valence
|
| 190 |
+
emotions: list[str] = []
|
| 191 |
+
if tension >= 0.5:
|
| 192 |
+
emotions.append("напряжение")
|
| 193 |
+
if uncertainty >= 0.5:
|
| 194 |
+
emotions.append("осторожность")
|
| 195 |
+
if valence >= 0.2:
|
| 196 |
+
emotions.append("оптимизм")
|
| 197 |
+
elif valence <= -0.2:
|
| 198 |
+
emotions.append("фрустрация")
|
| 199 |
+
|
| 200 |
if not emotions:
|
| 201 |
+
emotions = ["спокойствие"]
|
| 202 |
|
| 203 |
return (
|
| 204 |
PortraitSignal(
|
| 205 |
question_number=index + 1,
|
| 206 |
+
round_number=answer.round_number,
|
| 207 |
+
question_in_round=((index % 3) + 1),
|
| 208 |
+
question_text=answer.question_text,
|
| 209 |
tension=round(tension, 3),
|
| 210 |
uncertainty=round(uncertainty, 3),
|
| 211 |
valence=round(valence, 3),
|
|
|
|
| 221 |
confidence_proxy=6,
|
| 222 |
dominant_emotions=["спокойствие"],
|
| 223 |
trigger_questions=[],
|
| 224 |
+
triggers=[],
|
| 225 |
recommendation="Недостаточно данных для точного портрета. Нужны ответы на все вопросы.",
|
| 226 |
signals=[],
|
| 227 |
)
|
|
|
|
| 231 |
tensions: list[float] = []
|
| 232 |
uncertainties: list[float] = []
|
| 233 |
|
| 234 |
+
token_counts = [len(_WORD_RE.findall((answer.audio_transcript or "").lower())) for answer in answers]
|
| 235 |
+
avg_token_count = sum(token_counts) / len(token_counts)
|
| 236 |
+
|
| 237 |
for idx, answer in enumerate(answers):
|
| 238 |
signal, emotions = self._signal_for_answer(answer, idx)
|
| 239 |
signals.append(signal)
|
|
|
|
| 250 |
variance = sum((value - mean_tension) ** 2 for value in tensions) / len(tensions)
|
| 251 |
stdev = math.sqrt(variance)
|
| 252 |
|
| 253 |
+
stability_raw = _clamp(1 - (stdev * 1.7 + avg_jump * 1.15), 0.0, 1.0)
|
| 254 |
+
tension_raw = _clamp(mean_tension * 0.9 + max(0.0, stdev - 0.11) * 1.05, 0.0, 1.0)
|
| 255 |
+
short_answer_penalty = 0.22 if avg_token_count < 5 else 0.0
|
| 256 |
+
confidence_raw = _clamp((1 - mean_uncertainty) - short_answer_penalty, 0.0, 1.0)
|
| 257 |
+
|
| 258 |
+
emotional_stability = round(_clamp(2.5 + stability_raw * 6.0, 1, 10))
|
| 259 |
+
hidden_tension = round(_clamp(2.0 + tension_raw * 6.0, 1, 10))
|
| 260 |
+
confidence_proxy = round(_clamp(2.0 + confidence_raw * 6.0, 1, 10))
|
| 261 |
+
|
| 262 |
+
peak_tension = max(tensions) if tensions else 0.0
|
| 263 |
+
peak_uncertainty = max(uncertainties) if uncertainties else 0.0
|
| 264 |
+
triggers: list[PortraitTrigger] = []
|
| 265 |
+
|
| 266 |
+
if peak_tension >= 0.3 or peak_uncertainty >= 0.34:
|
| 267 |
+
top_signals = sorted(signals, key=lambda item: max(item.tension, item.uncertainty), reverse=True)
|
| 268 |
+
trigger_threshold = max(0.32, mean_tension + 0.08)
|
| 269 |
+
for signal in top_signals:
|
| 270 |
+
peak_score = max(signal.tension, signal.uncertainty)
|
| 271 |
+
if peak_score < trigger_threshold:
|
| 272 |
+
continue
|
| 273 |
+
reason = "рост напряжения" if signal.tension >= signal.uncertainty else "рост неопределенности"
|
| 274 |
+
triggers.append(
|
| 275 |
+
PortraitTrigger(
|
| 276 |
+
question_number=signal.question_number,
|
| 277 |
+
round_number=signal.round_number,
|
| 278 |
+
question_in_round=signal.question_in_round,
|
| 279 |
+
question_text=signal.question_text,
|
| 280 |
+
reason=reason,
|
| 281 |
+
score=round(peak_score, 3),
|
| 282 |
+
)
|
| 283 |
+
)
|
| 284 |
+
if len(triggers) == 2:
|
| 285 |
break
|
| 286 |
|
| 287 |
+
trigger_questions = [item.question_number for item in triggers]
|
| 288 |
+
|
| 289 |
if hidden_tension >= 7:
|
| 290 |
recommendation = "Говорить медленнее в стресс-темах: там растут маркеры напряжения и неопределенности."
|
| 291 |
elif confidence_proxy <= 4:
|
| 292 |
recommendation = "Уточнять формулировки короче и конкретнее: сейчас много осторожных оговорок."
|
| 293 |
+
elif not triggers:
|
| 294 |
+
recommendation = "Сохранять текущий темп: резких эмоциональных триггеров по ответам не выявлено."
|
| 295 |
else:
|
| 296 |
+
recommendation = "Добавить больше конкретики в триггерных вопросах и фиксировать метрики сразу в ответе."
|
| 297 |
|
| 298 |
dominant_emotions = [label for label, _ in emotion_counter.most_common(3)]
|
| 299 |
|
|
|
|
| 303 |
confidence_proxy=confidence_proxy,
|
| 304 |
dominant_emotions=dominant_emotions,
|
| 305 |
trigger_questions=trigger_questions,
|
| 306 |
+
triggers=triggers,
|
| 307 |
recommendation=recommendation,
|
| 308 |
signals=signals,
|
| 309 |
)
|
app/services/transcription.py
CHANGED
|
@@ -54,7 +54,11 @@ class TranscriptionService:
|
|
| 54 |
self._pipeline = self._get_pipeline(self.settings.whisper_model)
|
| 55 |
self._loaded = True
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
return result["text"].strip()
|
| 59 |
finally:
|
| 60 |
if wav_path.exists():
|
|
|
|
| 54 |
self._pipeline = self._get_pipeline(self.settings.whisper_model)
|
| 55 |
self._loaded = True
|
| 56 |
|
| 57 |
+
generate_kwargs = {"task": "transcribe"}
|
| 58 |
+
if self.settings.whisper_language:
|
| 59 |
+
generate_kwargs["language"] = self.settings.whisper_language
|
| 60 |
+
|
| 61 |
+
result = self._pipeline(str(wav_path), generate_kwargs=generate_kwargs)
|
| 62 |
return result["text"].strip()
|
| 63 |
finally:
|
| 64 |
if wav_path.exists():
|
scripts/preflight.sh
CHANGED
|
@@ -7,6 +7,7 @@ export WHISPER_MODE=mock
|
|
| 7 |
export PRELOAD_WHISPER_ON_STARTUP=false
|
| 8 |
export TTS_PROVIDER=mock
|
| 9 |
export LLM_PROVIDER=mock
|
|
|
|
| 10 |
unset GEMINI_API_KEY
|
| 11 |
unset ANTHROPIC_API_KEY
|
| 12 |
unset HUGGINGFACE_API_KEY
|
|
|
|
| 7 |
export PRELOAD_WHISPER_ON_STARTUP=false
|
| 8 |
export TTS_PROVIDER=mock
|
| 9 |
export LLM_PROVIDER=mock
|
| 10 |
+
export EMOTION_MODE=mock
|
| 11 |
unset GEMINI_API_KEY
|
| 12 |
unset ANTHROPIC_API_KEY
|
| 13 |
unset HUGGINGFACE_API_KEY
|
tests/conftest.py
CHANGED
|
@@ -8,6 +8,7 @@ os.environ["PRELOAD_WHISPER_ON_STARTUP"] = "false"
|
|
| 8 |
os.environ["TTS_PROVIDER"] = "mock"
|
| 9 |
os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
|
| 10 |
os.environ["LLM_PROVIDER"] = "mock"
|
|
|
|
| 11 |
os.environ.pop("ANTHROPIC_API_KEY", None)
|
| 12 |
os.environ.pop("HUGGINGFACE_API_KEY", None)
|
| 13 |
os.environ.pop("GEMINI_API_KEY", None)
|
|
|
|
| 8 |
os.environ["TTS_PROVIDER"] = "mock"
|
| 9 |
os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
|
| 10 |
os.environ["LLM_PROVIDER"] = "mock"
|
| 11 |
+
os.environ["EMOTION_MODE"] = "mock"
|
| 12 |
os.environ.pop("ANTHROPIC_API_KEY", None)
|
| 13 |
os.environ.pop("HUGGINGFACE_API_KEY", None)
|
| 14 |
os.environ.pop("GEMINI_API_KEY", None)
|
tests/test_api_flow.py
CHANGED
|
@@ -66,6 +66,7 @@ def test_full_9_question_flow_and_results(client):
|
|
| 66 |
assert 1 <= results_payload["portrait"]["emotional_stability"] <= 10
|
| 67 |
assert 1 <= results_payload["portrait"]["hidden_tension"] <= 10
|
| 68 |
assert isinstance(results_payload["portrait"]["trigger_questions"], list)
|
|
|
|
| 69 |
|
| 70 |
download = client.get(f"/api/session/{session_id}/download")
|
| 71 |
assert download.status_code == 200
|
|
|
|
| 66 |
assert 1 <= results_payload["portrait"]["emotional_stability"] <= 10
|
| 67 |
assert 1 <= results_payload["portrait"]["hidden_tension"] <= 10
|
| 68 |
assert isinstance(results_payload["portrait"]["trigger_questions"], list)
|
| 69 |
+
assert isinstance(results_payload["portrait"]["triggers"], list)
|
| 70 |
|
| 71 |
download = client.get(f"/api/session/{session_id}/download")
|
| 72 |
assert download.status_code == 200
|
tests/test_preflight_unittest.py
CHANGED
|
@@ -9,6 +9,7 @@ os.environ["PRELOAD_WHISPER_ON_STARTUP"] = "false"
|
|
| 9 |
os.environ["TTS_PROVIDER"] = "mock"
|
| 10 |
os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
|
| 11 |
os.environ["LLM_PROVIDER"] = "mock"
|
|
|
|
| 12 |
os.environ.pop("ANTHROPIC_API_KEY", None)
|
| 13 |
os.environ.pop("HUGGINGFACE_API_KEY", None)
|
| 14 |
os.environ.pop("GEMINI_API_KEY", None)
|
|
|
|
| 9 |
os.environ["TTS_PROVIDER"] = "mock"
|
| 10 |
os.environ["TTS_MODEL"] = "facebook/mms-tts-rus"
|
| 11 |
os.environ["LLM_PROVIDER"] = "mock"
|
| 12 |
+
os.environ["EMOTION_MODE"] = "mock"
|
| 13 |
os.environ.pop("ANTHROPIC_API_KEY", None)
|
| 14 |
os.environ.pop("HUGGINGFACE_API_KEY", None)
|
| 15 |
os.environ.pop("GEMINI_API_KEY", None)
|