i have downloaded the german language dataset containing of 4Gb, and unzipped it in the deepspeech-data
directory and it was visible in the docker container:
root@294bb9e108fd:/DeepSpeech# ls -la deepspeech-data/
total 72144
drwxr-xr-x 3 root root 36864 Feb 24 15:31 .
drwxr-xr-x 1 root root 4096 Feb 24 15:04 ..
drwxrwxr-x 2 1000 1000 27156480 Feb 24 14:38 clips
-rw-rw-r-- 1 1000 1000 735847 Feb 25 2019 dev.tsv
-rw-rw-r-- 1 1000 1000 1837461 Feb 25 2019 invalidated.tsv
-rw-rw-r-- 1 1000 1000 62 Feb 25 2019 other.tsv
-rw-rw-r-- 1 1000 1000 725953 Feb 25 2019 test.tsv
-rw-rw-r-- 1 1000 1000 868206 Feb 25 2019 train.tsv
-rw-rw-r-- 1 1000 1000 42490887 Feb 25 2019 validated.tsv
root@294bb9e108fd:/DeepSpeech#
when I run the importer [import_cv2.py] I run into this issue:
root@294bb9e108fd:/DeepSpeech# bin/import_cv2.py deepspeech-data/
/bin/sh: 1: sox: not found
SoX could not be found!
If you do not have SoX, proceed here:
- - - http://sox.sourceforge.net/ - - -
If you do (or think that you should) have SoX, double-check your
path variables.
Loading TSV file: /DeepSpeech/deepspeech-data/test.tsv
Importing mp3 files...
WARNING: No --validate_label_locale specified, your might end with inconsistent dataset.
WARNING: No --validate_label_locale specified, your might end with inconsistent dataset.
This install of SoX cannot process .mp3 files.
This install of SoX cannot process .mp3 files.
This install of SoX cannot process .mp3 files.
This install of SoX cannot process .mp3 files.
This install of SoX cannot process .mp3 files.
This install of SoX cannot process .mp3 files.
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "bin/import_cv2.py", line 65, in one_sample
_maybe_convert_wav(mp3_filename, wav_filename)
File "bin/import_cv2.py", line 185, in _maybe_convert_wav
transformer.build(mp3_filename, wav_filename)
File "/usr/local/lib/python3.6/dist-packages/sox/transform.py", line 594, in build
input_filepath, input_array, sample_rate_in
File "/usr/local/lib/python3.6/dist-packages/sox/transform.py", line 496, in _parse_inputs
input_format['channels'] = file_info.channels(input_filepath)
File "/usr/local/lib/python3.6/dist-packages/sox/file_info.py", line 82, in channels
output = soxi(input_filepath, 'c')
File "/usr/local/lib/python3.6/dist-packages/sox/core.py", line 149, in soxi
stderr=subprocess.PIPE
File "/usr/lib/python3.6/subprocess.py", line 356, in check_output
**kwargs).stdout
File "/usr/lib/python3.6/subprocess.py", line 423, in run
with Popen(*popenargs, **kwargs) as process:
File "/usr/lib/python3.6/subprocess.py", line 729, in __init__
restore_signals, start_new_session)
File "/usr/lib/python3.6/subprocess.py", line 1364, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'sox': 'sox'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "bin/import_cv2.py", line 221, in <module>
main()
File "bin/import_cv2.py", line 216, in main
_preprocess_data(PARAMS.tsv_dir, audio_dir, PARAMS.space_after_every_character)
File "bin/import_cv2.py", line 172, in _preprocess_data
set_samples = _maybe_convert_set(dataset, tsv_dir, audio_dir, space_after_every_character)
File "bin/import_cv2.py", line 127, in _maybe_convert_set
for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1):
File "/usr/lib/python3.6/multiprocessing/pool.py", line 735, in next
raise value
FileNotFoundError: [Errno 2] No such file or directory: 'sox': 'sox'
root@294bb9e108fd:/DeepSpeech#