Hi, I am trying to predict audio by chunks using Stream in deepspeech on NVIDIA GeForce GTX 1060 OC and I have this error
OP_REQUIRES failed at constant_op.cc:82 : Resource exhausted: OOM when allocating tensor of shape [4096,8192] and type float
Error running session: Resource exhausted: OOM when allocating tensor of shape [4096,8192] and type float
[[{{node cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel}}]]
2021-02-04 09:00:34.158856: W tensorflow/core/common_runtime/bfc_allocator.cc:431] Allocator (GPU_0_bfc) ran out of memory trying to allocate 128.00MiB (rounded to 134217728)requested by op cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
my server.py:
engine = SpeechToText()
@app.route("/", methods=["GET"])
async def healthcheck(_):
return response.text("Welcome to DeepSpeech Server!")
@app.websocket("/api/v1/STT")
async def stt(request, ws):
global inference_start, inference_end
logger.debug(f"Received {request.method} request at {request.path}")
stream = engine.model.createStream()
try:
while True:
byte_audio = await ws.recv()
if byte_audio == b'close':
break
elif byte_audio != b'and':
inference_start = perf_counter()
audio = engine.convert(byte_audio)
stream.feedAudioContent(audio)
elif byte_audio == b'and':
text = stream.intermediateDecode()
print('TEXT: ', text)
inference_end = perf_counter() - inference_start
await ws.send(response.json_dumps(text))
logger.debug(f"Completed {request.method} request at {request.path} in {inference_end} seconds")
except Exception as e:
logger.debug(f"Failed to process {request.method} request at {request.path}. The exception is: {str(e)}.")
await ws.send(json.dumps(HTTPError("Something went wrong").__dict__))
finally:
text = stream.finishStream()
print('Sentences: ', text)
await ws.send(response.json_dumps(text))
await ws.close()
if __name__ == "__main__":
app.run(
host='192.168.88.192',
port=8003,
access_log=True,
debug=True
)```
and my class SpeechToText
```class SpeechToText:
def __init__(self):
self.model = Model('/deepspeech-api/AM/output_graph_v0.7.pb')
self.model.enableExternalScorer('/deepspeech-api/LM/kenlm_optimize.scorer')
self.model.setScorerAlphaBeta(0.8197203731899586, 2.6569898782681394)
self.model.setBeamWidth(1000)
def convert(self, audio):
out, err = ffmpeg.input('pipe:0').output('pipe:1', f='WAV', acodec='pcm_s16le', ac=1, ar='16k',
loglevel='error', hide_banner=None).run(input=audio,
capture_stdout=True,
capture_stderr=True)
if err:
raise Exception(err)
out = BytesIO(out)
with wave.Wave_read(out) as wav:
pcm_data = np.frombuffer(wav.readframes(wav.getnframes()), dtype=np.int16)
return pcm_data```