Python tensorflow.random_normal() Examples
The following are 30
code examples of tensorflow.random_normal().
Example #1
Source File: From fine-lm with MIT License | 6 votes |
def vae(x, name, z_size): """Simple variational autoencoder without discretization. Args: x: Input to the discretization bottleneck. name: Name for the bottleneck scope. z_size: Number of bits used to produce discrete code; discrete codes range from 1 to 2**z_size. Returns: Embedding function, latent, loss, mu and log_simga. """ with tf.variable_scope(name): mu = tf.layers.dense(x, z_size, name="mu") log_sigma = tf.layers.dense(x, z_size, name="log_sigma") shape = common_layers.shape_list(x) epsilon = tf.random_normal([shape[0], shape[1], 1, z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) free_bits = z_size // 4 kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss, mu, log_sigma
Example #2
Source File: From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def set_input_shape(self, input_shape): batch_size, rows, cols, input_channels = input_shape kernel_shape = tuple(self.kernel_shape) + (input_channels, self.output_channels) assert len(kernel_shape) == 4 assert all(isinstance(e, int) for e in kernel_shape), kernel_shape init = tf.random_normal(kernel_shape, dtype=tf.float32) init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=(0, 1, 2))) self.kernels = tf.Variable(init) self.b = tf.Variable( np.zeros((self.output_channels,)).astype('float32')) input_shape = list(input_shape) input_shape[0] = 1 dummy_batch = tf.zeros(input_shape) dummy_output = self.fprop(dummy_batch) output_shape = [int(e) for e in dummy_output.get_shape()] output_shape[0] = batch_size self.output_shape = tuple(output_shape)
Example #3
Source File: From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def set_input_shape(self, input_shape): batch_size, dim = input_shape self.input_shape = [batch_size, dim] self.output_shape = [batch_size, self.num_hid] if self.init_mode == "norm": init = tf.random_normal([dim, self.num_hid], dtype=tf.float32) init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True)) init = init * self.init_scale elif self.init_mode == "uniform_unit_scaling": scale = np.sqrt(3. / dim) init = tf.random_uniform([dim, self.num_hid], dtype=tf.float32, minval=-scale, maxval=scale) else: raise ValueError(self.init_mode) self.W = PV(init) if self.use_bias: self.b = PV((np.zeros((self.num_hid,)) + self.init_b).astype('float32'))
Example #4
Source File: From dnc with Apache License 2.0 | 6 votes |
def testBuildAndTrain(self): inputs = tf.random_normal([TIME_STEPS, BATCH_SIZE, INPUT_SIZE]) output, _ = rnn.dynamic_rnn( cell=self.module, inputs=inputs, initial_state=self.initial_state, time_major=True) targets = np.random.rand(TIME_STEPS, BATCH_SIZE, NUM_READS, WORD_SIZE) loss = tf.reduce_mean(tf.square(output - targets)) train_op = tf.train.GradientDescentOptimizer(1).minimize(loss) init = tf.global_variables_initializer() with self.test_session():
Example #5
Source File: From DOTA_models with Apache License 2.0 | 6 votes |
def __init__(self, batch_size, z_size, mean, logvar): """Create a diagonal gaussian distribution. Args: batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples. z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor. mean: The N-D mean of the distribution. logvar: The N-D log variance of the diagonal distribution. """ size__xz = [None, z_size] self.mean = mean # bxn already self.logvar = logvar # bxn already self.noise = noise = tf.random_normal(tf.shape(logvar)) self.sample = mean + tf.exp(0.5 * logvar) * noise mean.set_shape(size__xz) logvar.set_shape(size__xz) self.sample.set_shape(size__xz)
Example #6
Source File: From tangent with Apache License 2.0 | 6 votes |
def _test_tf_hvp(func, optimized, tf): a = tf.random_normal(shape=(300,)) v = tf.reshape(a, shape=(-1,)) modes = ['forward', 'reverse'] for mode1 in modes: for mode2 in modes: if mode1 == mode2 == 'forward': continue df = tangent.autodiff( func, mode=mode1, motion='joint', optimized=optimized, check_dims=False) ddf = tangent.autodiff( df, mode=mode2, motion='joint', optimized=optimized, check_dims=False) dx = ddf(a, tf.constant(1.0), v) # We just ensure it computes something in this case. assert dx.shape == a.shape
Example #7
Source File: From DeepLab_v3 with MIT License | 6 votes |
def test_generator_graph(self): tf.set_random_seed(1234) # Check graph construction for a number of image size/depths and batch # sizes. for i, batch_size in zip(xrange(3, 7), xrange(3, 8)): tf.reset_default_graph() final_size = 2 ** i noise = tf.random_normal([batch_size, 64]) image, end_points = dcgan.generator( noise, depth=32, final_size=final_size) self.assertAllEqual([batch_size, final_size, final_size, 3], image.shape.as_list()) expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits'] self.assertSetEqual(set(expected_names), set(end_points.keys())) # Check layer depths. for j in range(1, i): layer = end_points['deconv%i' % j] self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1])
Example #8
Source File: From DOTA_models with Apache License 2.0 | 6 votes |
def testLinearShared(self): # Create a linear map which is applied twice on different inputs # (i.e. the weights of the map are shared). linear_map = blocks_std.Linear(6) x1 = tf.random_normal(shape=[1, 5]) x2 = tf.random_normal(shape=[1, 5]) xs = x1 + x2 # Apply the transform with the same weights. y1 = linear_map(x1) y2 = linear_map(x2) ys = linear_map(xs) with self.test_session() as sess: # Initialize all the variables of the graph. tf.global_variables_initializer().run() y1_res, y2_res, ys_res =[y1, y2, ys]) self.assertAllClose(y1_res + y2_res, ys_res)
Example #9
Source File: From DOTA_models with Apache License 2.0 | 6 votes |
def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(), scale = 0.1): self.n_input = n_input self.n_hidden = n_hidden self.transfer = transfer_function self.scale = tf.placeholder(tf.float32) self.training_scale = scale network_weights = self._initialize_weights() self.weights = network_weights # model self.x = tf.placeholder(tf.float32, [None, self.n_input]) self.hidden = self.transfer(tf.add(tf.matmul(self.x + scale * tf.random_normal((n_input,)), self.weights['w1']), self.weights['b1'])) self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2']) # cost self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0)) self.optimizer = optimizer.minimize(self.cost) init = tf.global_variables_initializer() self.sess = tf.Session()
Example #10
Source File: From DOTA_models with Apache License 2.0 | 6 votes |
def sample_action(self, logits, sampling_dim, act_dim, act_type, greedy=False): """Sample an action from a distribution.""" if self.env_spec.is_discrete(act_type): if greedy: act = tf.argmax(logits, 1) else: act = tf.reshape(tf.multinomial(logits, 1), [-1]) elif self.env_spec.is_box(act_type): means = logits[:, :sampling_dim / 2] std = logits[:, sampling_dim / 2:] if greedy: act = means else: batch_size = tf.shape(logits)[0] act = means + std * tf.random_normal([batch_size, act_dim]) else: assert False return act
Example #11
Source File: From tensorflow_RL with MIT License | 6 votes |
def __init__(self, name, state_size, output_size): self.state_size = state_size self.output_size = output_size with tf.variable_scope(name): self.input = tf.placeholder(tf.float32, shape=[None, self.state_size]) self.action = tf.placeholder(tf.float32, shape=[None, self.output_size]) self.l1 = tf.layers.dense(inputs=self.input, units=128, activation=tf.nn.relu) self.l2 = tf.layers.dense(inputs=self.l1, units=128, activation=tf.nn.relu) self.l3 = tf.layers.dense(inputs=self.l2, units=128, activation=tf.nn.relu) = tf.layers.dense(inputs=self.l3, units=self.output_size, activation=None) self.log_std = tf.get_variable(name='log_std', initializer= -0.5 * np.ones(self.output_size, dtype=np.float32)) self.std = tf.exp(self.log_std) self.pi = + tf.random_normal(tf.shape( * self.std self.logp = gaussian_likelihood(self.action,, self.log_std) self.logp_pi = gaussian_likelihood(self.pi,, self.log_std) self.scope = tf.get_variable_scope().name
Example #12
Source File: From cs294-112_hws with MIT License | 6 votes |
def sample_action(self, policy_parameters): """ constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (batch_size, self.ac_dim) returns: sy_sampled_ac: (batch_size, self.ac_dim) """ sy_mean, sy_logstd = policy_parameters sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1) return sy_sampled_ac
Example #13
Source File: From HardRLWithYoutube with MIT License | 6 votes |
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613 X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg') wd_dict = {} h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0] sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n)) wd_loss = tf.get_collection("vf_losses", None) loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss) loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n))) self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ async=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables(): if "vf" in vf_var_list.append(var) update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list) self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101 U.initialize() # Initialize uninitialized TF variables
Example #14
Source File: From tensorflow-alexnet with MIT License | 6 votes |
def fc(inputs, output_size, init_bias=0.0, activation_func=tf.nn.relu, stddev=0.01): input_shape = inputs.get_shape().as_list() if len(input_shape) == 4: fc_weights = tf.Variable( tf.random_normal([input_shape[1] * input_shape[2] * input_shape[3], output_size], dtype=tf.float32, stddev=stddev), name='weights') inputs = tf.reshape(inputs, [-1, fc_weights.get_shape().as_list()[0]]) else: fc_weights = tf.Variable(tf.random_normal([input_shape[-1], output_size], dtype=tf.float32, stddev=stddev), name='weights') fc_biases = tf.Variable(tf.constant(init_bias, shape=[output_size], dtype=tf.float32), name='biases') fc_layer = tf.matmul(inputs, fc_weights) fc_layer = tf.nn.bias_add(fc_layer, fc_biases) if activation_func: fc_layer = activation_func(fc_layer) return fc_layer
Example #15
Source File: From fine-lm with MIT License | 6 votes |
def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum): channels = 3 pred = tf.random_normal([batch, height, width, num_mixtures * 10]) labels = tf.random_uniform([batch, height, width, channels], minval=0, maxval=256, dtype=tf.int32) actual_loss_num, actual_loss_den = common_layers.dml_loss( pred=pred, labels=labels, reduce_sum=reduce_sum) actual_loss = actual_loss_num / actual_loss_den real_labels = common_layers.convert_rgb_to_symmetric_real(labels) expected_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=real_labels) / channels if reduce_sum: expected_loss = tf.reduce_mean(expected_loss) with self.test_session() as sess: actual_loss_val, expected_loss_val = [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val)
Example #16
Source File: From fine-lm with MIT License | 6 votes |
def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth): batch = 1 height = 8 width = 8 channels = 3 rows = height if likelihood == common_image_attention.DistributionType.CAT: cols = channels * width else: cols = width hparams = hidden_size=2, likelihood=likelihood, mode=tf.estimator.ModeKeys.TRAIN, num_mixtures=num_mixtures, ) decoder_output = tf.random_normal([batch, rows, cols, hparams.hidden_size]) targets = tf.random_uniform([batch, height, width, channels], minval=-1., maxval=1.) output = common_image_attention.create_output( decoder_output, rows, cols, targets, hparams) if hparams.likelihood == common_image_attention.DistributionType.CAT: self.assertEqual(output.shape, (batch, height, width, channels, depth)) else: self.assertEqual(output.shape, (batch, height, width, depth))
Example #17
Source File: From lirpg with MIT License | 6 votes |
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613 X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg') wd_dict = {} h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0] sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n)) wd_loss = tf.get_collection("vf_losses", None) loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss) loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n))) self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ async=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables(): if "vf" in vf_var_list.append(var) update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list) self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101 U.initialize() # Initialize uninitialized TF variables
Example #18
Source File: From deep_architect with MIT License | 5 votes |
def kaiming2015delving_initializer_conv(gain=1.0): def init_fn(shape): n = np.product(shape) stddev = gain * np.sqrt(2.0 / n) init_vals = tf.random_normal(shape, 0.0, stddev) return init_vals return init_fn
Example #19
Source File: From HardRLWithYoutube with MIT License | 5 votes |
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev): assert len(actor.vars) == len(perturbed_actor.vars) assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars) updates = [] for var, perturbed_var in zip(actor.vars, perturbed_actor.vars): if var in actor.perturbable_vars:' {} <- {} + noise'.format(, updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else:' {} <- {}'.format(, updates.append(tf.assign(perturbed_var, var)) assert len(updates) == len(actor.vars) return*updates)
Example #20
Source File: From lirpg with MIT License | 5 votes |
def __init__(self, ob_dim, ac_dim): # Here we'll construct a bunch of expressions, which will be used in two places: # (1) When sampling actions # (2) When computing loss functions, for the policy update # Variables specific to (1) have the word "sampled" in them, # whereas variables specific to (2) have the word "old" in them ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate wd_dict = {} h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output self.wd_dict = wd_dict self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs logstd_1a = tf.expand_dims(logstd_1a, 0) std_1a = tf.exp(logstd_1a) std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1]) ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1) sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform. logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy) kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim)) #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy self.compute_kl = U.function([ob_no, oldac_dist], kl) self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss U.initialize() # Initialize uninitialized TF variables
Example #21
Source File: From SentenceFunction with Apache License 2.0 | 5 votes |
def sample_gaussian(mu, logvar): epsilon = tf.random_normal(tf.shape(logvar), name="epsilon") std = tf.exp(0.5 * logvar) z = mu + tf.multiply(std, epsilon) return z
Example #22
Source File: From fine-lm with MIT License | 5 votes |
def testMakeMultiscaleDilatedLarger(self): image = tf.random_normal([256, 256, 3]) resolutions = [257] with self.assertRaisesRegexp(ValueError, "strides.* must be non-zero"): _ = image_utils.make_multiscale_dilated(image, resolutions)
Example #23
Source File: From fine-lm with MIT License | 5 votes |
def testMakeMultiscaleDilatedIndivisible(self): image = tf.random_normal([256, 256, 3]) resolutions = [255] scaled_images = image_utils.make_multiscale_dilated(image, resolutions) self.assertEqual(scaled_images[0].shape, (256, 256, 3))
Example #24
Source File: From fine-lm with MIT License | 5 votes |
def testProjectHidden(self): hidden_size = 60 block_dim = 20 num_blocks = 3 x = tf.zeros(shape=[1, hidden_size], dtype=tf.float32) projection_tensors = tf.random_normal( shape=[num_blocks, hidden_size, block_dim], dtype=tf.float32) x_projected = discretization.project_hidden(x, projection_tensors, hidden_size, num_blocks) with self.test_session() as sess: tf.global_variables_initializer().run() x_projected_eval = self.assertEqual(np.shape(x_projected_eval), (1, num_blocks, block_dim)) self.assertTrue(np.all(x_projected_eval == 0))
Example #25
Source File: From lirpg with MIT License | 5 votes |
def sample(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean))
Example #26
Source File: From lirpg with MIT License | 5 votes |
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev): assert len(actor.vars) == len(perturbed_actor.vars) assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars) updates = [] for var, perturbed_var in zip(actor.vars, perturbed_actor.vars): if var in actor.perturbable_vars:' {} <- {} + noise'.format(, updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else:' {} <- {}'.format(, updates.append(tf.assign(perturbed_var, var)) assert len(updates) == len(actor.vars) return*updates)
Example #27
Source File: From HardRLWithYoutube with MIT License | 5 votes |
def __init__(self, ob_dim, ac_dim): # Here we'll construct a bunch of expressions, which will be used in two places: # (1) When sampling actions # (2) When computing loss functions, for the policy update # Variables specific to (1) have the word "sampled" in them, # whereas variables specific to (2) have the word "old" in them ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate wd_dict = {} h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output self.wd_dict = wd_dict self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs logstd_1a = tf.expand_dims(logstd_1a, 0) std_1a = tf.exp(logstd_1a) std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1]) ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1) sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform. logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy) kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim)) #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy self.compute_kl = U.function([ob_no, oldac_dist], kl) self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss U.initialize() # Initialize uninitialized TF variables
Example #28
Source File: From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev): assert len(actor.vars) == len(perturbed_actor.vars) assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars) updates = [] for var, perturbed_var in zip(actor.vars, perturbed_actor.vars): if var in actor.perturbable_vars:' {} <- {} + noise'.format(, updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else:' {} <- {}'.format(, updates.append(tf.assign(perturbed_var, var)) assert len(updates) == len(actor.vars) return*updates)
Example #29
Source File: From HardRLWithYoutube with MIT License | 5 votes |
def sample(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean))
Example #30
Source File: From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def sample(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean))