How to find the which file is making loss inf

I want to print or save it as a text log , which files is training on which batch.
because i have huge data in that in some audio file making loss has inf .
any suggestion

if any give me a hint about where i code print statement in the Deepspeech.py
thanks,

Code for doing this has been merged on master: https://github.com/mozilla/DeepSpeech/pull/2289

1 Like

hey it is alpha version it does not support both the checkpoint 0.5.1 and 0.5.0
it showing error.
thanks

and it taking so much time for initial transcript and alphabet process compare to 0.5.1 version

how can I add this code in 0.5.1 I just copied the alpha version Deepspeech.py and pasted in 0.5.1 folder and i tried to run the train but shown a error
alpha version

Obtain the next batch of data

_, (batch_x, batch_seq_len), batch_y = iterator.get_next()
batch_filenames, (batch_x, batch_seq_len), batch_y = iterator.get_next()

0.5.1
(batch_x, batch_seq_len), batch_y = iterator.get_next()

so where you coded to get the batch_filenames

any suggestion
thanks

Using this with 0.5.1 will be harder, you’ll have to rebase that PR as well as the previous one that added the filenames to the dataset. I wouldn’t recommend it if you’re not familiar with git and our codebase.

Another alternative is to do a binary search on the data by splitting the CSV in half and checking which half produces inf loss.

thanks but I trying to merge the alpha code of loss:inf check and 0.5.1 if succeed i let you know.

hey I tried it out it’s working I modified the 0.5.1 code according to the alpha code and I copied the alpha feeding.py and text.py to the 0.5.1 util folder .Now it’s working fine.
but I tried to change the 0.5.1 feeding.py and text.py by refering the alpha code feeding.py and text.py .but I got error which is batch_fn expected three but get four argument. which is wav_filenames.

alpha code

-- coding: utf-8 --

from future import absolute_import, division, print_function

import os

from functools import partial

import numpy as np
import pandas
import tensorflow as tf
import datetime

from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

from util.config import Config
from util.logging import log_error
from util.text import text_to_char_array

def read_csvs(csv_files):
source_data = None
for csv in csv_files:
file = pandas.read_csv(csv, encoding=‘utf-8’, na_filter=False)
#FIXME: not cross-platform
csv_dir = os.path.dirname(os.path.abspath(csv))
file[‘wav_filename’] = file[‘wav_filename’].str.replace(r’(^[^/])’, lambda m: os.path.join(csv_dir, m.group(1))) # pylint: disable=cell-var-from-loop
if source_data is None:
source_data = file
else:
source_data = source_data.append(file)
return source_data

def samples_to_mfccs(samples, sample_rate):
spectrogram = contrib_audio.audio_spectrogram(samples,
window_size=Config.audio_window_samples,
stride=Config.audio_step_samples,
magnitude_squared=True)
mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input)
mfccs = tf.reshape(mfccs, [-1, Config.n_input])

return mfccs, tf.shape(mfccs)[0]

def audiofile_to_features(wav_filename):
samples = tf.io.read_file(wav_filename)
decoded = contrib_audio.decode_wav(samples, desired_channels=1)
features, features_len = samples_to_mfccs(decoded.audio, decoded.sample_rate)

return features, features_len

def entry_to_features(wav_filename, transcript):
# https://bugs.python.org/issue32117
features, features_len = audiofile_to_features(wav_filename)
return wav_filename, features, features_len, tf.SparseTensor(*transcript)

def to_sparse_tuple(sequence):
r""“Creates a sparse representention of sequence.
Returns a tuple with (indices, values, shape)
“””
indices = np.asarray(list(zip([0]*len(sequence), range(len(sequence)))), dtype=np.int64)
shape = np.asarray([1, len(sequence)], dtype=np.int64)
return indices, sequence, shape

def create_dataset(csvs, batch_size, cache_path=’’):
df = read_csvs(csvs)
df.sort_values(by=‘wav_filesize’, inplace=True)

try:
    # Convert to character index arrays
    df = df.apply(partial(text_to_char_array, alphabet=Config.alphabet), result_type='broadcast', axis=1)
except ValueError as e:
    error_message, series, *_ = e.args
    log_error('While processing {}:\n  {}'.format(series['wav_filename'], error_message))
    exit(1)

def generate_values():
    for _, row in df.iterrows():
        yield row.wav_filename, to_sparse_tuple(row.transcript)

# Batching a dataset of 2D SparseTensors creates 3D batches, which fail
# when passed to tf.nn.ctc_loss, so we reshape them to remove the extra
# dimension here.
def sparse_reshape(sparse):
    shape = sparse.dense_shape
    return tf.sparse.reshape(sparse, [shape[0], shape[2]])

def batch_fn(wav_filenames, features, features_len, transcripts):
    features = tf.data.Dataset.zip((features, features_len))
    features = features.padded_batch(batch_size,
                                     padded_shapes=([None, Config.n_input], []))
    transcripts = transcripts.batch(batch_size).map(sparse_reshape)
    wav_filenames = wav_filenames.batch(batch_size)
    return tf.data.Dataset.zip((wav_filenames, features, transcripts))

num_gpus = len(Config.available_devices)

dataset = (tf.data.Dataset.from_generator(generate_values,
                                          output_types=(tf.string, (tf.int64, tf.int32, tf.int64)))
                          .map(entry_to_features, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                          .cache(cache_path)
                          .window(batch_size, drop_remainder=True).flat_map(batch_fn)
                          .prefetch(num_gpus))

return dataset

def secs_to_hours(secs):
hours, remainder = divmod(secs, 3600)
minutes, seconds = divmod(remainder, 60)
return ‘%d:%02d:%02d’ % (hours, minutes, seconds)

0.5.1

-- coding: utf-8 --

from future import absolute_import, division, print_function

import os

from functools import partial

import numpy as np
import pandas
import tensorflow as tf
import datetime

from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

from util.config import Config
from util.logging import log_error
from util.text import text_to_char_array

def read_csvs(csv_files):
source_data = None
for csv in csv_files:
file = pandas.read_csv(csv, encoding=‘utf-8’, na_filter=False)
#FIXME: not cross-platform
csv_dir = os.path.dirname(os.path.abspath(csv))
file[‘wav_filename’] = file[‘wav_filename’].str.replace(r’(^[^/])’, lambda m: os.path.join(csv_dir, m.group(1))) # pylint: disable=cell-var-from-loop
if source_data is None:
source_data = file
else:
source_data = source_data.append(file)
return source_data

def samples_to_mfccs(samples, sample_rate):
spectrogram = contrib_audio.audio_spectrogram(samples,
window_size=Config.audio_window_samples,
stride=Config.audio_step_samples,
magnitude_squared=True)
mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input)
mfccs = tf.reshape(mfccs, [-1, Config.n_input])

return mfccs, tf.shape(mfccs)[0]

def audiofile_to_features(wav_filename):
samples = tf.read_file(wav_filename)
decoded = contrib_audio.decode_wav(samples, desired_channels=1)
features, features_len = samples_to_mfccs(decoded.audio, decoded.sample_rate)

return features, features_len

def entry_to_features(wav_filename, transcript):
# https://bugs.python.org/issue32117
features, features_len = audiofile_to_features(wav_filename)
return wav_filename, features, features_len, tf.SparseTensor(*transcript)

def to_sparse_tuple(sequence):
r""“Creates a sparse representention of sequence.
Returns a tuple with (indices, values, shape)
“””
indices = np.asarray(list(zip([0]*len(sequence), range(len(sequence)))), dtype=np.int64)
shape = np.asarray([1, len(sequence)], dtype=np.int64)
return indices, sequence, shape

def create_dataset(csvs, batch_size, cache_path=’’):
df = read_csvs(csvs)
df.sort_values(by=‘wav_filesize’, inplace=True)
print(’;;;;;;;;;;;;;;;;;;;;;;;;;’)

# Convert to character index arrays
df['transcript'] = df['transcript'].apply(partial(text_to_char_array, alphabet=Config.alphabet))

def generate_values():
    for _, row in df.iterrows():
        yield row.wav_filename, to_sparse_tuple(row.transcript)

# Batching a dataset of 2D SparseTensors creates 3D batches, which fail
# when passed to tf.nn.ctc_loss, so we reshape them to remove the extra
# dimension here.
def sparse_reshape(sparse):
    shape = sparse.dense_shape
    return tf.sparse.reshape(sparse, [shape[0], shape[2]])

def batch_fn(wav_filenames,features, features_len, transcripts):
    print('.....................................')
    features = tf.data.Dataset.zip((features, features_len))
    features = features.padded_batch(batch_size,
                                     padded_shapes=([None, Config.n_input], []))
    transcripts = transcripts.batch(batch_size).map(sparse_reshape)
    wav_filenames = wav_filenames.batch(batch_size)
    return tf.data.Dataset.zip((wav_filenames,features, transcripts))

num_gpus = len(Config.available_devices)

dataset = (tf.data.Dataset.from_generator(generate_values,
                                          output_types=(tf.string, (tf.int64, tf.int32, tf.int64)))
                          .map(entry_to_features, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                          .cache(cache_path)
                          .window(batch_size, drop_remainder=True).flat_map(batch_fn)
                          .prefetch(num_gpus))

return dataset

def secs_to_hours(secs):
hours, remainder = divmod(secs, 3600)
minutes, seconds = divmod(remainder, 60)
return ‘%d:%02d:%02d’ % (hours, minutes, seconds)

0.5.1 original code

-- coding: utf-8 --

from future import absolute_import, division, print_function

import os

from functools import partial

import numpy as np
import pandas
import tensorflow as tf
import datetime

from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

from util.config import Config
from util.text import text_to_char_array

def read_csvs(csv_files):
source_data = None
for csv in csv_files:
file = pandas.read_csv(csv, encoding=‘utf-8’, na_filter=False)
#FIXME: not cross-platform
csv_dir = os.path.dirname(os.path.abspath(csv))
file[‘wav_filename’] = file[‘wav_filename’].str.replace(r’(^[^/])’, lambda m: os.path.join(csv_dir, m.group(1))) # pylint: disable=cell-var-from-loop
if source_data is None:
source_data = file
else:
source_data = source_data.append(file)
return source_data

def samples_to_mfccs(samples, sample_rate):
spectrogram = contrib_audio.audio_spectrogram(samples,
window_size=Config.audio_window_samples,
stride=Config.audio_step_samples,
magnitude_squared=True)
mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input)
mfccs = tf.reshape(mfccs, [-1, Config.n_input])

return mfccs, tf.shape(mfccs)[0]

def audiofile_to_features(wav_filename):
samples = tf.read_file(wav_filename)
decoded = contrib_audio.decode_wav(samples, desired_channels=1)
features, features_len = samples_to_mfccs(decoded.audio, decoded.sample_rate)

return features, features_len

def entry_to_features(wav_filename, transcript):
# https://bugs.python.org/issue32117
features, features_len = audiofile_to_features(wav_filename)
return features, features_len, tf.SparseTensor(*transcript)

def to_sparse_tuple(sequence):
r""“Creates a sparse representention of sequence.
Returns a tuple with (indices, values, shape)
“””
indices = np.asarray(list(zip([0]*len(sequence), range(len(sequence)))), dtype=np.int64)
shape = np.asarray([1, len(sequence)], dtype=np.int64)
return indices, sequence, shape

def create_dataset(csvs, batch_size, cache_path=’’):
df = read_csvs(csvs)
df.sort_values(by=‘wav_filesize’, inplace=True)
print(’;;;;;;;;;;;;;;;;;;;;;;;;;’)

# Convert to character index arrays
df['transcript'] = df['transcript'].apply(partial(text_to_char_array, alphabet=Config.alphabet))

def generate_values():
    for _, row in df.iterrows():
        yield row.wav_filename, to_sparse_tuple(row.transcript)

# Batching a dataset of 2D SparseTensors creates 3D batches, which fail
# when passed to tf.nn.ctc_loss, so we reshape them to remove the extra
# dimension here.
def sparse_reshape(sparse):
    shape = sparse.dense_shape
    return tf.sparse.reshape(sparse, [shape[0], shape[2]])

def batch_fn(features, features_len, transcripts):
    print('.....................................')
    features = tf.data.Dataset.zip((features, features_len))
    features = features.padded_batch(batch_size,
                                     padded_shapes=([None, Config.n_input], []))
    transcripts = transcripts.batch(batch_size).map(sparse_reshape)
    return tf.data.Dataset.zip((features, transcripts))

num_gpus = len(Config.available_devices)

dataset = (tf.data.Dataset.from_generator(generate_values,
                                          output_types=(tf.string, (tf.int64, tf.int32, tf.int64)))
                          .map(entry_to_features, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                          .cache(cache_path)
                          .window(batch_size, drop_remainder=True).flat_map(batch_fn)
                          .prefetch(num_gpus))

return dataset

def secs_to_hours(secs):
hours, remainder = divmod(secs, 3600)
minutes, seconds = divmod(remainder, 60)
return ‘%d:%02d:%02d’ % (hours, minutes, seconds)

So I overwrited the feeding.py and text.py.
Now Its working

thanks

If possible please tell me why I m getting the error.

Thanks

On v.0.5.1, do:

git cherry-pick 007e512

then, update DeepSpeech.py as attached: DeepSpeech.py.zip (9.8 KB)

This should work, if I didn’t miss anything.