Yes I did and it is not working.
see below:
You see, one GPU card is busy.
And when I run DeepSpeech, it is trying to run on both of them, which causes OOM。
D Starting coordinator…
D Coordinator started.
0
0
0
D Starting queue runners…
D Queue runners started.
WARNING: libdeepspeech failed to load, resorting to deprecated code
Refer to README.md for instructions on installing libdeepspeech
E OOM when allocating tensor with shape[78,10,512]
E [[Node: tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=[“loc:@tower_1/bidirectional_rnn/bw/bw/TensorArray”], dtype=DT_FLOAT, element_shape=[?,512], _device=“/job:localhost/replica:0/task:0/device:GPU:1”](tower_1/bidirectional_rnn/bw/bw/TensorArray, tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/range, tower_1/bidirectional_rnn/bw/bw/while/Exit_1)]]
E [[Node: tower_1/gradients/tower_1/MatMul_grad/tuple/control_dependency_1/_697 = _Recvclient_terminated=false, recv_device=“/job:localhost/replica:0/task:0/device:CPU:0”, send_device=“/job:localhost/replica:0/task:0/device:GPU:1”, send_device_incarnation=1, tensor_name=“edge_4787_tower_1/gradients/tower_1/MatMul_grad/tuple/control_dependency_1”, tensor_type=DT_FLOAT, _device=“/job:localhost/replica:0/task:0/device:CPU:0”]]
E
E Caused by op u’tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/TensorArrayGatherV3’, defined at:
E File “DeepSpeech.py”, line 1892, in
E tf.app.run()
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/platform/app.py”, line 133, in run
E _sys.exit(main(argv))
E File “DeepSpeech.py”, line 1849, in main
E train()
E File “DeepSpeech.py”, line 1555, in train
E results_tuple, gradients, mean_edit_distance, loss = get_tower_results(model_feeder, optimizer)
E File “DeepSpeech.py”, line 642, in get_tower_results
E calculate_mean_edit_distance_and_loss(model_feeder, i, no_dropout if optimizer is None else dropout_rates)
E File “DeepSpeech.py”, line 523, in calculate_mean_edit_distance_and_loss
E logits = BiRNN(batch_x, tf.to_int64(batch_seq_len), dropout)
E File “DeepSpeech.py”, line 460, in BiRNN
E sequence_length=seq_length)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py”, line 428, in bidirectional_dynamic_rnn
E time_major=time_major, scope=bw_scope)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py”, line 616, in dynamic_rnn
E dtype=dtype)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py”, line 795, in _dynamic_rnn_loop
E final_outputs = tuple(ta.stack() for ta in output_final_ta)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py”, line 795, in
E final_outputs = tuple(ta.stack() for ta in output_final_ta)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py”, line 889, in stack
E return self._implementation.stack(name=name)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py”, line 288, in stack
E return self.gather(math_ops.range(0, self.size()), name=name)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py”, line 302, in gather
E element_shape=element_shape)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py”, line 4158, in _tensor_array_gather_v3
E flow_in=flow_in, dtype=dtype, element_shape=element_shape, name=name)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py”, line 787, in _apply_op_helper
E op_def=op_def)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py”, line 3081, in create_op
E op_def=op_def)
E File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/framework/ops.py”, line 1528, in init
E self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
E
E ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[78,10,512]
E [[Node: tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=[“loc:@tower_1/bidirectional_rnn/bw/bw/TensorArray”], dtype=DT_FLOAT, element_shape=[?,512], _device=“/job:localhost/replica:0/task:0/device:GPU:1”](tower_1/bidirectional_rnn/bw/bw/TensorArray, tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/range, tower_1/bidirectional_rnn/bw/bw/while/Exit_1)]]
E [[Node: tower_1/gradients/tower_1/MatMul_grad/tuple/control_dependency_1/_697 = _Recvclient_terminated=false, recv_device=“/job:localhost/replica:0/task:0/device:CPU:0”, send_device=“/job:localhost/replica:0/task:0/device:GPU:1”, send_device_incarnation=1, tensor_name=“edge_4787_tower_1/gradients/tower_1/MatMul_grad/tuple/control_dependency_1”, tensor_type=DT_FLOAT, _device=“/job:localhost/replica:0/task:0/device:CPU:0”]]
E
Traceback (most recent call last):
File “DeepSpeech.py”, line 1649, in train
step = session.run(global_step, feed_dict=feed_dict)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”, line 524, in run
run_metadata=run_metadata)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”, line 996, in run
run_metadata=run_metadata)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”, line 1087, in run
raise six.reraise(*original_exc_info)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”, line 1072, in run
return self._sess.run(*args, **kwargs)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”, line 1144, in run
run_metadata=run_metadata)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py”, line 924, in run
return self._sess.run(*args, **kwargs)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.py”, line 889, in run
run_metadata_ptr)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.py”, line 1120, in _run
feed_dict_tensor, options, run_metadata)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.py”, line 1317, in _do_run
options, run_metadata)
File “/opt/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.py”, line 1336, in _do_call
raise type(e)(node_def, op, message)
ResourceExhaustedError: OOM when allocating tensor with shape[78,10,512]
[[Node: tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/TensorArrayGatherV3 = TensorArrayGatherV3[_class=[“loc:@tower_1/bidirectional_rnn/bw/bw/TensorArray”], dtype=DT_FLOAT, element_shape=[?,512], _device=“/job:localhost/replica:0/task:0/device:GPU:1”](tower_1/bidirectional_rnn/bw/bw/TensorArray, tower_1/bidirectional_rnn/bw/bw/TensorArrayStack/range, tower_1/bidirectional_rnn/bw/bw/while/Exit_1)]]
[[Node: tower_1/gradients/tower_1/MatMul_grad/tuple/control_dependency_1/_697 = _Recvclient_terminated=false, recv_device=“/job:localhost/replica:0/task:0/device:CPU:0”, send_device=“/job:localhost/replica:0/task:0/device:GPU:1”, send_device_incarnation=1, tensor_name=“edge_4787_tower_1/gradients/tower_1/MatMul_grad/tuple/control_dependency_1”, tensor_type=DT_FLOAT, _device=“/job:localhost/replica:0/task:0/device:CPU:0”]]