Finally I’m trying my hands with 0.6.1.
I downloaded the latest 0.6.1 and cloned the latest master repo from GitHub.
I tested the requirements and all are fine in the sens that I can use Deepspeech and existing 0.6.1 models for interference.
I’m using Tensorflow GPU 1.14.0 and Deepspeech GPU 0.6.1
However, while training I’m getting the following error:
0118 20:14:40.234090 139903553251136 saver.py:1280] Restoring parameters from /home/sayantan/Desktop/ai_learning/deepspeech_0_6/deepspeech-0.6.1-checkpoint/best_dev-233784
Traceback (most recent call last):
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
return fn(*args)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.NotFoundError: Key cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias/Adam not found in checkpoint
[[{{node save/RestoreV2}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1286, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
run_metadata_ptr)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173, in _run
feed_dict_tensor, options, run_metadata)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
run_metadata)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: Key cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias/Adam not found in checkpoint
[[node save/RestoreV2 (defined at DeepSpeech.py:495) ]]
Original stack trace for 'save/RestoreV2':
File "DeepSpeech.py", line 974, in <module>
absl.app.run(main)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/absl/app.py", line 299, in run
_run_main(main, args)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/absl/app.py", line 250, in _run_main
sys.exit(main(argv))
File "DeepSpeech.py", line 947, in main
train()
File "DeepSpeech.py", line 495, in train
checkpoint_saver = tfv1.train.Saver(max_to_keep=FLAGS.max_to_keep)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 825, in __init__
self.build()
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 837, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 875, in _build
build_restore=build_restore)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 508, in _build_internal
restore_sequentially, reshape)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 328, in _AddRestoreOps
restore_sequentially)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 575, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1696, in restore_v2
name=name)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
op_def=op_def)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
self._traceback = tf_stack.extract_stack()
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1296, in restore
names_to_keys = object_graph_key_mapping(save_path)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1614, in object_graph_key_mapping
object_graph_string = reader.get_tensor(trackable.OBJECT_GRAPH_PROTO_KEY)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/pywrap_tensorflow_internal.py", line 678, in get_tensor
return CheckpointReader_GetTensor(self, compat.as_bytes(tensor_str))
tensorflow.python.framework.errors_impl.NotFoundError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "DeepSpeech.py", line 974, in <module>
absl.app.run(main)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/absl/app.py", line 299, in run
_run_main(main, args)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/absl/app.py", line 250, in _run_main
sys.exit(main(argv))
File "DeepSpeech.py", line 947, in main
train()
File "DeepSpeech.py", line 555, in train
loaded = try_loading(session, checkpoint_saver, 'checkpoint', 'most recent')
File "DeepSpeech.py", line 405, in try_loading
saver.restore(session, checkpoint_path)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1302, in restore
err, "a Variable name or other graph key that is missing")
tensorflow.python.framework.errors_impl.NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:
Key cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias/Adam not found in checkpoint
[[node save/RestoreV2 (defined at DeepSpeech.py:495) ]]
Original stack trace for 'save/RestoreV2':
File "DeepSpeech.py", line 974, in <module>
absl.app.run(main)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/absl/app.py", line 299, in run
_run_main(main, args)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/absl/app.py", line 250, in _run_main
sys.exit(main(argv))
File "DeepSpeech.py", line 947, in main
train()
File "DeepSpeech.py", line 495, in train
checkpoint_saver = tfv1.train.Saver(max_to_keep=FLAGS.max_to_keep)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 825, in __init__
self.build()
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 837, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 875, in _build
build_restore=build_restore)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 508, in _build_internal
restore_sequentially, reshape)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 328, in _AddRestoreOps
restore_sequentially)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 575, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1696, in restore_v2
name=name)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
op_def=op_def)
File "/home/sayantan/anaconda3/envs/deepspeech_6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
self._traceback = tf_stack.extract_stack()
Could yo confirm why is this happening? I thought this was due to difference in checkpoint and code versions. However, in the new environment that I created now the codes were all pulled now from GitHub and they’re all related to 0.6.1 release.
Thanks a lot team…!!
EDIT
Further info:
Cuda version:
Cuda compilation tools, release 10.0, V10.0.130
Cudnn version:
7.4.2
#define CUDNN_MAJOR 7
#define CUDNN_MINOR 4
#define CUDNN_PATCHLEVEL 2