hey @lissyx
my alphabet.txt file
# Each line in this file represents the Unicode codepoint (UTF-8 encoded)
# associated with a numeric label.
# A line that starts with # is a comment. You can escape it with \# if you wish
# to use '#' as a label.
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
# The last (non-comment) line needs to end with a newline.
After checking my transcripts …i found these characters
### The following unique characters were found in your transcripts: ###
[' ', 'b', 'c', 'j', 'h', 't', 'x', 's', 'o', 'r', 'f', 'n', 'm', 'q', 'k', 'g', 'u', 'w', 'p', 'e', 'y', 'z', 'a', 'i', 'l', 'v', 'd']
i got the error as … but there are no extra spaces
+ '[' '!' -f DeepSpeech.py ']'
+ python3 -u DeepSpeech.py --train_files minigir/train/train.csv --dev_files minigir/train/train.csv --test_files minigir/train/train.csv --train_batch_size 48 --dev_batch_size 40 --test_batch_size 40 --n_hidden 1024 --epochs 64 --early_stop True --es_steps 6 --es_mean_th 0.1 --es_std_th 0.1 --dropout_rate 0.30 --learning_rate 0.0005 --report_count 100 --export_dir metlife-models/ --checkpoint_dir metlife-models/check_point --alphabet_config_path metlife-models/alphabet.txt --lm_binary_path metlife-models/lm.binary --lm_trie_path metlife-models/trie
Traceback (most recent call last):
File "/home/metlife-vad/DeepSpeech/util/text.py", line 33, in _label_from_string
return self._str_to_label[string]
KeyError: ' '
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/metlife-vad/DeepSpeech/util/text.py", line 85, in text_to_char_array
transcript = np.asarray(alphabet.encode(series['transcript']))
File "/home/metlife-vad/DeepSpeech/util/text.py", line 47, in encode
res.append(self._label_from_string(char))
File "/home/metlife-vad/DeepSpeech/util/text.py", line 39, in _label_from_string
).with_traceback(e.__traceback__)
File "/home/metlife-vad/DeepSpeech/util/text.py", line 33, in _label_from_string
return self._str_to_label[string]
KeyError: "ERROR: Your transcripts contain characters (e.g. ' ') which do not occur in data/alphabet.txt! Use util/check_characters.py to see what characters are in your [train,dev,test].csv transcripts, and then add all these to data/alphabet.txt."
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "DeepSpeech.py", line 931, in <module>
absl.app.run(main)
File "/home/metlife-vad/.local/lib/python3.7/site-packages/absl/app.py", line 299, in run
_run_main(main, args)
File "/home/metlife-vad/.local/lib/python3.7/site-packages/absl/app.py", line 250, in _run_main
sys.exit(main(argv))
File "DeepSpeech.py", line 915, in main
train()
File "DeepSpeech.py", line 435, in train
train_phase=True)
File "/home/metlife-vad/DeepSpeech/util/feeding.py", line 101, in create_dataset
df['transcript'] = df.apply(text_to_char_array, alphabet=Config.alphabet, result_type='reduce', axis=1)
File "/home/metlife-vad/.local/lib/python3.7/site-packages/pandas/core/frame.py", line 6928, in apply
return op.get_result()
File "/home/metlife-vad/.local/lib/python3.7/site-packages/pandas/core/apply.py", line 186, in get_result
return self.apply_standard()
File "/home/metlife-vad/.local/lib/python3.7/site-packages/pandas/core/apply.py", line 292, in apply_standard
self.apply_series_generator()
File "/home/metlife-vad/.local/lib/python3.7/site-packages/pandas/core/apply.py", line 321, in apply_series_generator
results[i] = self.f(v)
File "/home/metlife-vad/.local/lib/python3.7/site-packages/pandas/core/apply.py", line 112, in f
return func(x, *args, **kwds)
File "/home/metlife-vad/DeepSpeech/util/text.py", line 91, in text_to_char_array
raise ValueError('While processing: {}\n{}'.format(series['wav_filename'], e))
ValueError: ('While processing: /home/metlife-vad/DeepSpeech/minigir/wav/tmp.wav\n"ERROR: Your transcripts contain characters (e.g. \' \') which do not occur in data/alphabet.txt! Use util/check_characters.py to see what characters are in your [train,dev,test].csv transcripts, and then add all these to data/alphabet.txt."', 'occurred at index 0')