[Python] ValueError: invalid literal for int() with base 10

I’m record and save it to an audio variable and got this error:

return _impl.SpeechToText(aCtx, aBuffer)
ValueError: invalid literal for int() with base 10: b'#\x06\x0e\x0b\x92\n0\x11\xac\x18\xc2\x1b\x05\x1c\x87\x1d\xd1\x18\xd0\x12v\x07\xc6\xf9\xf1\xf0\x8c\xe7[\xe5\x12\xe7$\xe5\x90\xe3@\xe7\x14\xef\xdf\xf2\xbe\xf0\xbb\xefy\xeb\x93\xe49\xe0\x05\xde\xd6\xd9

What do it means and how can i fix it?

def run(self, audio):
# audio = BytesIO(audio)
result = deepspeechModel.stt(audio_buffer=audio)
return result

It is hard to help with just 2 lines of code, please give more information.

Recorded with Pyaudio:

FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
rec = []

pyAudio.open(rate=RATE,
channels=CHANNELS,
format=FORMAT,
input=True,
output=True,
frames_per_buffer=CHUNK)

data = self._stream.read(CHUNK)
rec.append(data)
run(b’’.join(rec))

The error is in line:
result = deepspeechModel.stt(audio_buffer=audio)

Did you read what’s in the link?

Now yes,

It running it on Ubuntu 20.04
Python version 3.7
I Think its a running iterference

How I said I use PyAudio to record my mic to a variable (audio)
my goal ist go geht the output (Text) to a variable in return.

the Errorcodes you can see in my first Post

I hope thats besser to understand

Why don’t you do like in the Python example?

Okay not the error is away but deepspeech return always None.

My code:

recorder.py
class Recorder:
def init(self):
_pyAudio = pyaudio.PyAudio()
self._stream = _pyAudio.open(rate=RATE,
channels=CHANNELS,
format=FORMAT,
input=True,
output=True,
input_device_index=None,
output_device_index=0,
frames_per_buffer=CHUNK)

        self.stt = speechtotextengines.DeepSpeech('./models/deepspeech-0.9.2-models.tflite',
                                                  './models/deepspeech-0.9.2-models.scorer')

    @staticmethod
    def rms(frame):
        shorts = array.array(FRAME_FORMAT, frame)
        sum_squares = sum(
            (sample * NORMALIZE) ** 2
            for sample in shorts
        )
        return (sum_squares / len(shorts)) ** 0.5 * 1000

    def record(self):
        print('Noise detected')
        rec = []
        current = time.time()
        end = time.time() + TIMEOUT_LENGTH

        while current <= end:

            data = self._stream.read(CHUNK)
            if self.rms(data) >= THRESHOLD:
                end = time.time() + TIMEOUT_LENGTH

            current = time.time()
            rec.append(data)

        # self.stt.run(b''.join(rec))
        """Play Stream"""
        # self._stream.write(b''.join(rec))
        text = self.stt.run(b''.join(rec))
        print(text)
        print('Listening...')

    def listen(self):
        print('Listening...')
        while True:
            mic_input = self._stream.read(CHUNK)
            rms_val = self.rms(mic_input)
            if rms_val >= THRESHOLD:
                self.record()


if __name__ == '__main__':
    recorder = Recorder()
    recorder.listen()

speechtotextengines.py
import numpy
import deepspeech
import wave

class DeepSpeech:
    """Class to perform speech-to-text transcription and related functionality"""

    def __init__(self, model_path, scorer_path):
        self.model = deepspeech.Model(model_path)
        self.model.enableExternalScorer(scorer_path)

    def run(self, audio):
        print('start')
        self.model.stt(numpy.frombuffer(audio, numpy.int16))
        print('end')