I’m record and save it to an audio variable and got this error:
return _impl.SpeechToText(aCtx, aBuffer)
ValueError: invalid literal for int() with base 10: b'#\x06\x0e\x0b\x92\n0\x11\xac\x18\xc2\x1b\x05\x1c\x87\x1d\xd1\x18\xd0\x12v\x07\xc6\xf9\xf1\xf0\x8c\xe7[\xe5\x12\xe7$\xe5\x90\xe3@\xe7\x14\xef\xdf\xf2\xbe\xf0\xbb\xefy\xeb\x93\xe49\xe0\x05\xde\xd6\xd9
What do it means and how can i fix it?
def run(self, audio):
# audio = BytesIO(audio)
result = deepspeechModel.stt(audio_buffer=audio)
return result
othiele
(Olaf Thiele)
December 8, 2020, 2:06pm
2
It is hard to help with just 2 lines of code, please give more information .
Recorded with Pyaudio:
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
rec = []
pyAudio.open(rate=RATE,
channels=CHANNELS,
format=FORMAT,
input=True,
output=True,
frames_per_buffer=CHUNK)
data = self._stream.read(CHUNK)
rec.append(data)
run(b’’.join(rec))
The error is in line:
result = deepspeechModel.stt(audio_buffer=audio)
othiele
(Olaf Thiele)
December 8, 2020, 2:24pm
4
Did you read what’s in the link?
Now yes,
It running it on Ubuntu 20.04
Python version 3.7
I Think its a running iterference
How I said I use PyAudio to record my mic to a variable (audio)
my goal ist go geht the output (Text) to a variable in return.
the Errorcodes you can see in my first Post
I hope thats besser to understand
othiele
(Olaf Thiele)
December 8, 2020, 4:44pm
6
Why don’t you do like in the Python example ?
Okay not the error is away but deepspeech return always None.
My code:
recorder.py
class Recorder:
def init (self):
_pyAudio = pyaudio.PyAudio()
self._stream = _pyAudio.open(rate=RATE,
channels=CHANNELS,
format=FORMAT,
input=True,
output=True,
input_device_index=None,
output_device_index=0,
frames_per_buffer=CHUNK)
self.stt = speechtotextengines.DeepSpeech('./models/deepspeech-0.9.2-models.tflite',
'./models/deepspeech-0.9.2-models.scorer')
@staticmethod
def rms(frame):
shorts = array.array(FRAME_FORMAT, frame)
sum_squares = sum(
(sample * NORMALIZE) ** 2
for sample in shorts
)
return (sum_squares / len(shorts)) ** 0.5 * 1000
def record(self):
print('Noise detected')
rec = []
current = time.time()
end = time.time() + TIMEOUT_LENGTH
while current <= end:
data = self._stream.read(CHUNK)
if self.rms(data) >= THRESHOLD:
end = time.time() + TIMEOUT_LENGTH
current = time.time()
rec.append(data)
# self.stt.run(b''.join(rec))
"""Play Stream"""
# self._stream.write(b''.join(rec))
text = self.stt.run(b''.join(rec))
print(text)
print('Listening...')
def listen(self):
print('Listening...')
while True:
mic_input = self._stream.read(CHUNK)
rms_val = self.rms(mic_input)
if rms_val >= THRESHOLD:
self.record()
if __name__ == '__main__':
recorder = Recorder()
recorder.listen()
speechtotextengines.py
import numpy
import deepspeech
import wave
class DeepSpeech:
"""Class to perform speech-to-text transcription and related functionality"""
def __init__(self, model_path, scorer_path):
self.model = deepspeech.Model(model_path)
self.model.enableExternalScorer(scorer_path)
def run(self, audio):
print('start')
self.model.stt(numpy.frombuffer(audio, numpy.int16))
print('end')