How to find the which file is making loss inf

vigneshgig · August 20, 2019, 5:29am

I want to print or save it as a text log , which files is training on which batch.
because i have huge data in that in some audio file making loss has inf .
any suggestion

if any give me a hint about where i code print statement in the Deepspeech.py
thanks,

reuben · August 20, 2019, 7:32am

Code for doing this has been merged on master: https://github.com/mozilla/DeepSpeech/pull/2289

vigneshgig · August 20, 2019, 10:27am

hey it is alpha version it does not support both the checkpoint 0.5.1 and 0.5.0
it showing error.
thanks

vigneshgig · August 20, 2019, 10:30am

and it taking so much time for initial transcript and alphabet process compare to 0.5.1 version

vigneshgig · August 20, 2019, 11:35am

how can I add this code in 0.5.1 I just copied the alpha version Deepspeech.py and pasted in 0.5.1 folder and i tried to run the train but shown a error
alpha version

Obtain the next batch of data

_, (batch_x, batch_seq_len), batch_y = iterator.get_next()
batch_filenames, (batch_x, batch_seq_len), batch_y = iterator.get_next()

0.5.1
(batch_x, batch_seq_len), batch_y = iterator.get_next()

so where you coded to get the batch_filenames

any suggestion
thanks

reuben · August 20, 2019, 11:55am

Using this with 0.5.1 will be harder, you’ll have to rebase that PR as well as the previous one that added the filenames to the dataset. I wouldn’t recommend it if you’re not familiar with git and our codebase.

Another alternative is to do a binary search on the data by splitting the CSV in half and checking which half produces inf loss.

vigneshgig · August 20, 2019, 2:03pm

thanks but I trying to merge the alpha code of loss:inf check and 0.5.1 if succeed i let you know.

vigneshgig · August 20, 2019, 2:50pm

hey I tried it out it’s working I modified the 0.5.1 code according to the alpha code and I copied the alpha feeding.py and text.py to the 0.5.1 util folder .Now it’s working fine.
but I tried to change the 0.5.1 feeding.py and text.py by refering the alpha code feeding.py and text.py .but I got error which is batch_fn expected three but get four argument. which is wav_filenames.

alpha code

-- coding: utf-8 --

from future import absolute_import, division, print_function

import os

from functools import partial

import numpy as np
import pandas
import tensorflow as tf
import datetime

from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

from util.config import Config
from util.logging import log_error
from util.text import text_to_char_array

def read_csvs(csv_files):
source_data = None
for csv in csv_files:
file = pandas.read_csv(csv, encoding=‘utf-8’, na_filter=False)
#FIXME: not cross-platform
csv_dir = os.path.dirname(os.path.abspath(csv))
file[‘wav_filename’] = file[‘wav_filename’].str.replace(r’(^[^/])’, lambda m: os.path.join(csv_dir, m.group(1))) # pylint: disable=cell-var-from-loop
if source_data is None:
source_data = file
else:
source_data = source_data.append(file)
return source_data

def samples_to_mfccs(samples, sample_rate):
spectrogram = contrib_audio.audio_spectrogram(samples,
window_size=Config.audio_window_samples,
stride=Config.audio_step_samples,
magnitude_squared=True)
mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input)
mfccs = tf.reshape(mfccs, [-1, Config.n_input])

return mfccs, tf.shape(mfccs)[0]

def audiofile_to_features(wav_filename):
samples = tf.io.read_file(wav_filename)
decoded = contrib_audio.decode_wav(samples, desired_channels=1)
features, features_len = samples_to_mfccs(decoded.audio, decoded.sample_rate)

return features, features_len

def entry_to_features(wav_filename, transcript):
# https://bugs.python.org/issue32117
features, features_len = audiofile_to_features(wav_filename)
return wav_filename, features, features_len, tf.SparseTensor(*transcript)

def to_sparse_tuple(sequence):
r""“Creates a sparse representention of sequence.
Returns a tuple with (indices, values, shape)
“””
indices = np.asarray(list(zip([0]*len(sequence), range(len(sequence)))), dtype=np.int64)
shape = np.asarray([1, len(sequence)], dtype=np.int64)
return indices, sequence, shape

def create_dataset(csvs, batch_size, cache_path=’’):
df = read_csvs(csvs)
df.sort_values(by=‘wav_filesize’, inplace=True)

try:
    # Convert to character index arrays
    df = df.apply(partial(text_to_char_array, alphabet=Config.alphabet), result_type='broadcast', axis=1)
except ValueError as e:
    error_message, series, *_ = e.args
    log_error('While processing {}:\n  {}'.format(series['wav_filename'], error_message))
    exit(1)

def generate_values():
    for _, row in df.iterrows():
        yield row.wav_filename, to_sparse_tuple(row.transcript)

# Batching a dataset of 2D SparseTensors creates 3D batches, which fail
# when passed to tf.nn.ctc_loss, so we reshape them to remove the extra
# dimension here.
def sparse_reshape(sparse):
    shape = sparse.dense_shape
    return tf.sparse.reshape(sparse, [shape[0], shape[2]])

def batch_fn(wav_filenames, features, features_len, transcripts):
    features = tf.data.Dataset.zip((features, features_len))
    features = features.padded_batch(batch_size,
                                     padded_shapes=([None, Config.n_input], []))
    transcripts = transcripts.batch(batch_size).map(sparse_reshape)
    wav_filenames = wav_filenames.batch(batch_size)
    return tf.data.Dataset.zip((wav_filenames, features, transcripts))

num_gpus = len(Config.available_devices)

dataset = (tf.data.Dataset.from_generator(generate_values,
                                          output_types=(tf.string, (tf.int64, tf.int32, tf.int64)))
                          .map(entry_to_features, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                          .cache(cache_path)
                          .window(batch_size, drop_remainder=True).flat_map(batch_fn)
                          .prefetch(num_gpus))

return dataset

def secs_to_hours(secs):
hours, remainder = divmod(secs, 3600)
minutes, seconds = divmod(remainder, 60)
return ‘%d:%02d:%02d’ % (hours, minutes, seconds)

0.5.1

-- coding: utf-8 --

from future import absolute_import, division, print_function

import os

from functools import partial

import numpy as np
import pandas
import tensorflow as tf
import datetime

from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

from util.config import Config
from util.logging import log_error
from util.text import text_to_char_array

def read_csvs(csv_files):
source_data = None
for csv in csv_files:
file = pandas.read_csv(csv, encoding=‘utf-8’, na_filter=False)
#FIXME: not cross-platform
csv_dir = os.path.dirname(os.path.abspath(csv))
file[‘wav_filename’] = file[‘wav_filename’].str.replace(r’(^[^/])’, lambda m: os.path.join(csv_dir, m.group(1))) # pylint: disable=cell-var-from-loop
if source_data is None:
source_data = file
else:
source_data = source_data.append(file)
return source_data

def samples_to_mfccs(samples, sample_rate):
spectrogram = contrib_audio.audio_spectrogram(samples,
window_size=Config.audio_window_samples,
stride=Config.audio_step_samples,
magnitude_squared=True)
mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input)
mfccs = tf.reshape(mfccs, [-1, Config.n_input])

return mfccs, tf.shape(mfccs)[0]

def audiofile_to_features(wav_filename):
samples = tf.read_file(wav_filename)
decoded = contrib_audio.decode_wav(samples, desired_channels=1)
features, features_len = samples_to_mfccs(decoded.audio, decoded.sample_rate)

return features, features_len

def entry_to_features(wav_filename, transcript):
# https://bugs.python.org/issue32117
features, features_len = audiofile_to_features(wav_filename)
return wav_filename, features, features_len, tf.SparseTensor(*transcript)

def to_sparse_tuple(sequence):
r""“Creates a sparse representention of sequence.
Returns a tuple with (indices, values, shape)
“””
indices = np.asarray(list(zip([0]*len(sequence), range(len(sequence)))), dtype=np.int64)
shape = np.asarray([1, len(sequence)], dtype=np.int64)
return indices, sequence, shape

def create_dataset(csvs, batch_size, cache_path=’’):
df = read_csvs(csvs)
df.sort_values(by=‘wav_filesize’, inplace=True)
print(’;;;;;;;;;;;;;;;;;;;;;;;;;’)

# Convert to character index arrays
df['transcript'] = df['transcript'].apply(partial(text_to_char_array, alphabet=Config.alphabet))

def generate_values():
    for _, row in df.iterrows():
        yield row.wav_filename, to_sparse_tuple(row.transcript)

# Batching a dataset of 2D SparseTensors creates 3D batches, which fail
# when passed to tf.nn.ctc_loss, so we reshape them to remove the extra
# dimension here.
def sparse_reshape(sparse):
    shape = sparse.dense_shape
    return tf.sparse.reshape(sparse, [shape[0], shape[2]])

def batch_fn(wav_filenames,features, features_len, transcripts):
    print('.....................................')
    features = tf.data.Dataset.zip((features, features_len))
    features = features.padded_batch(batch_size,
                                     padded_shapes=([None, Config.n_input], []))
    transcripts = transcripts.batch(batch_size).map(sparse_reshape)
    wav_filenames = wav_filenames.batch(batch_size)
    return tf.data.Dataset.zip((wav_filenames,features, transcripts))

num_gpus = len(Config.available_devices)

dataset = (tf.data.Dataset.from_generator(generate_values,
                                          output_types=(tf.string, (tf.int64, tf.int32, tf.int64)))
                          .map(entry_to_features, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                          .cache(cache_path)
                          .window(batch_size, drop_remainder=True).flat_map(batch_fn)
                          .prefetch(num_gpus))

return dataset

def secs_to_hours(secs):
hours, remainder = divmod(secs, 3600)
minutes, seconds = divmod(remainder, 60)
return ‘%d:%02d:%02d’ % (hours, minutes, seconds)

0.5.1 original code

-- coding: utf-8 --

from future import absolute_import, division, print_function

import os

from functools import partial

import numpy as np
import pandas
import tensorflow as tf
import datetime

from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio

from util.config import Config
from util.text import text_to_char_array

def read_csvs(csv_files):
source_data = None
for csv in csv_files:
file = pandas.read_csv(csv, encoding=‘utf-8’, na_filter=False)
#FIXME: not cross-platform
csv_dir = os.path.dirname(os.path.abspath(csv))
file[‘wav_filename’] = file[‘wav_filename’].str.replace(r’(^[^/])’, lambda m: os.path.join(csv_dir, m.group(1))) # pylint: disable=cell-var-from-loop
if source_data is None:
source_data = file
else:
source_data = source_data.append(file)
return source_data

def samples_to_mfccs(samples, sample_rate):
spectrogram = contrib_audio.audio_spectrogram(samples,
window_size=Config.audio_window_samples,
stride=Config.audio_step_samples,
magnitude_squared=True)
mfccs = contrib_audio.mfcc(spectrogram, sample_rate, dct_coefficient_count=Config.n_input)
mfccs = tf.reshape(mfccs, [-1, Config.n_input])

return mfccs, tf.shape(mfccs)[0]

def audiofile_to_features(wav_filename):
samples = tf.read_file(wav_filename)
decoded = contrib_audio.decode_wav(samples, desired_channels=1)
features, features_len = samples_to_mfccs(decoded.audio, decoded.sample_rate)

return features, features_len

def entry_to_features(wav_filename, transcript):
# https://bugs.python.org/issue32117
features, features_len = audiofile_to_features(wav_filename)
return features, features_len, tf.SparseTensor(*transcript)

def to_sparse_tuple(sequence):
r""“Creates a sparse representention of sequence.
Returns a tuple with (indices, values, shape)
“””
indices = np.asarray(list(zip([0]*len(sequence), range(len(sequence)))), dtype=np.int64)
shape = np.asarray([1, len(sequence)], dtype=np.int64)
return indices, sequence, shape

def create_dataset(csvs, batch_size, cache_path=’’):
df = read_csvs(csvs)
df.sort_values(by=‘wav_filesize’, inplace=True)
print(’;;;;;;;;;;;;;;;;;;;;;;;;;’)

# Convert to character index arrays
df['transcript'] = df['transcript'].apply(partial(text_to_char_array, alphabet=Config.alphabet))

def generate_values():
    for _, row in df.iterrows():
        yield row.wav_filename, to_sparse_tuple(row.transcript)

# Batching a dataset of 2D SparseTensors creates 3D batches, which fail
# when passed to tf.nn.ctc_loss, so we reshape them to remove the extra
# dimension here.
def sparse_reshape(sparse):
    shape = sparse.dense_shape
    return tf.sparse.reshape(sparse, [shape[0], shape[2]])

def batch_fn(features, features_len, transcripts):
    print('.....................................')
    features = tf.data.Dataset.zip((features, features_len))
    features = features.padded_batch(batch_size,
                                     padded_shapes=([None, Config.n_input], []))
    transcripts = transcripts.batch(batch_size).map(sparse_reshape)
    return tf.data.Dataset.zip((features, transcripts))

num_gpus = len(Config.available_devices)

dataset = (tf.data.Dataset.from_generator(generate_values,
                                          output_types=(tf.string, (tf.int64, tf.int32, tf.int64)))
                          .map(entry_to_features, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                          .cache(cache_path)
                          .window(batch_size, drop_remainder=True).flat_map(batch_fn)
                          .prefetch(num_gpus))

return dataset

def secs_to_hours(secs):
hours, remainder = divmod(secs, 3600)
minutes, seconds = divmod(remainder, 60)
return ‘%d:%02d:%02d’ % (hours, minutes, seconds)

So I overwrited the feeding.py and text.py.
Now Its working

thanks

If possible please tell me why I m getting the error.

Thanks

SamahZaro · August 24, 2019, 7:00am

On v.0.5.1, do:

git cherry-pick 007e512

then, update DeepSpeech.py as attached: DeepSpeech.py.zip (9.8 KB)

This should work, if I didn’t miss anything.