I found this sample for a tensorflow based tts that delegates to cpu and gpu simultaneously.
How do I need to change the torch setup of mozilla tts for this to work:
import tensorflow as tf
import numpy as np
from time import time
from threading import Thread
n = 1024 * 2
data_cpu = np.random.uniform(size=[n//16, n]).astype(np.float32)
data_gpu = np.random.uniform(size=[n , n]).astype(np.float32)
with tf.device('/cpu:0'):
x = tf.placeholder(name='x', dtype=tf.float32)
def get_var(name):
return tf.get_variable(name, shape=[n, n])
def op(name):
w = get_var(name)
y = x
for _ in range(8):
y = tf.matmul(y, w)
return y
with tf.device('/cpu:0'):
cpu = op('w_cpu')
with tf.device('/gpu:0'):
gpu = op('w_gpu')
def f(session, y, data):
return session.run(y, feed_dict={x : data})
with tf.Session(config=tf.ConfigProto(log_device_placement=True, intra_op_parallelism_threads=8)) as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = []
# comment out 0 or 1 of the following 2 lines:
threads += [Thread(target=f, args=(sess, cpu, data_cpu))]
threads += [Thread(target=f, args=(sess, gpu, data_gpu))]
t0 = time()
for t in threads:
t.start()
coord.join(threads)
t1 = time()
print t1 - t0
Also, how does it influence the total length of the sentence and size of dataset?
Will this fill the 2GB and use the CPU and RAM for the rest or will it just delegate evenly as if I had two time 2GB GPU?