import os import io import shutil import tempfile import soundfile as sf from fastapi import FastAPI, HTTPException, Query, File, UploadFile from fastapi.responses import StreamingResponse from inference import SvaraEngine # Base model + adapters BASE_MODEL = "kenpath/svara-tts-v1" ADAPTERS_DIR = "./adapters" # Language name → code mapping LANG_MAP = { "hindi": "hi", "bengali": "bn", "telugu": "te", "kannada": "kn", "bhojpuri": "bhj", "chhattisgarhi": "ch", "english": "en", "maithili": "mth", "marathi": "mar", "gujarati": "guj", "magahi": "mgh" } # Discover adapters in folder adapter_map = {} if os.path.exists(ADAPTERS_DIR): for name in os.listdir(ADAPTERS_DIR): path = os.path.join(ADAPTERS_DIR, name) if os.path.isdir(path): adapter_map[name] = path app = FastAPI(title="Team Submission - Svara TTS API") engine = None @app.on_event("startup") async def startup_event(): global engine if not adapter_map: raise RuntimeError("No adapters found in ./adapters folder!") engine = SvaraEngine(BASE_MODEL, adapter_map) print("✅ SvaraEngine initialized with adapters:", list(adapter_map.keys())) @app.get("/") def home(): return {"status": "healthy", "languages_loaded": list(adapter_map.keys())} @app.api_route("/Get_Inference", methods=["GET", "POST"]) async def get_inference( text: str = Query(..., description="Text to synthesize"), lang: str = Query(..., description="Language name (e.g. 'hindi')"), speaker_wav: UploadFile = File(...), ): temp_wav_path = None try: if engine is None: raise HTTPException(status_code=500, detail="Engine not initialized.") # Normalize and resolve language lang_key = lang.lower().strip() target_lang_code = LANG_MAP.get(lang_key) if not target_lang_code and lang_key in adapter_map: target_lang_code = lang_key if not target_lang_code: raise HTTPException( status_code=400, detail=f"Language '{lang}' not supported or mapped.", ) # Save uploaded reference audio to temp file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav: shutil.copyfileobj(speaker_wav.file, temp_wav) temp_wav_path = temp_wav.name print(f"[DEBUG] Saved uploaded audio to: {temp_wav_path}") print(f"[DEBUG] File size on disk: {os.path.getsize(temp_wav_path)} bytes") # Run TTS audio, sr = engine.synthesize(text, target_lang_code, temp_wav_path) if audio is None: raise HTTPException( status_code=500, detail="Model failed to generate audio." ) # Stream WAV back buf = io.BytesIO() sf.write(buf, audio, sr, format="WAV") buf.seek(0) return StreamingResponse(buf, media_type="audio/wav") except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=str(e)) finally: if temp_wav_path and os.path.exists(temp_wav_path): try: os.remove(temp_wav_path) except Exception: pass if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)