Spaces:

comodoro
/

Coqui-STT-transcription

Runtime error

897e744 about 4 years ago

1.38 kB

	from stt import Model
	import gradio as gr
	import numpy as np

	model = 'stt-comodoro-czech-2022-05-31.tflite'
	scorer = 'czech-large-vocab.scorer'
	beam_width = 512
	lm_alpha = 0.94
	lm_beta = 2.52

	model = Model(model)
	model.enableExternalScorer(scorer)
	model.setScorerAlphaBeta(lm_alpha, lm_beta)
	model.setBeamWidth(beam_width)

	def reformat_freq(sr, y):
	if sr not in (
	48000,
	16000,
	): # Deepspeech only supports 16k, (we convert 48k -> 16k)
	raise ValueError("Unsupported rate", sr)
	if sr == 48000:
	y = (
	((y / max(np.max(y), 1)) * 32767)
	.reshape((-1, 3))
	.mean(axis=1)
	.astype("int16")
	)
	sr = 16000
	return sr, y

	def transcribe(speech):
	_, y = reformat_freq(*speech)
	stream = model.createStream()
	stream.feedAudioContent(y)
	text = stream.intermediateDecode()
	return text

	with gr.Blocks() as blocks:
	audio = gr.Audio(source="microphone", type="numpy", streaming=False,
	label='Pokud je to třeba, povolte mikrofon pro tuto stránku, \
	klikněte na Record from microphone, po dokončení nahrávání na Stop recording a poté na Rozpoznat')
	btn = gr.Button('Rozpoznat')
	output = gr.Textbox(show_label=False)
	btn.click(fn=transcribe, inputs=[audio],
	outputs=[output])

	blocks.launch(enable_queue=True, debug=True, share=True)