Python tensorflow.random_normal() Examples
The following are 30
code examples of tensorflow.random_normal().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: discretization.py From fine-lm with MIT License | 6 votes |
def vae(x, name, z_size): """Simple variational autoencoder without discretization. Args: x: Input to the discretization bottleneck. name: Name for the bottleneck scope. z_size: Number of bits used to produce discrete code; discrete codes range from 1 to 2**z_size. Returns: Embedding function, latent, loss, mu and log_simga. """ with tf.variable_scope(name): mu = tf.layers.dense(x, z_size, name="mu") log_sigma = tf.layers.dense(x, z_size, name="log_sigma") shape = common_layers.shape_list(x) epsilon = tf.random_normal([shape[0], shape[1], 1, z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) free_bits = z_size // 4 kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss, mu, log_sigma
Example #2
Source File: model.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def set_input_shape(self, input_shape): batch_size, rows, cols, input_channels = input_shape kernel_shape = tuple(self.kernel_shape) + (input_channels, self.output_channels) assert len(kernel_shape) == 4 assert all(isinstance(e, int) for e in kernel_shape), kernel_shape init = tf.random_normal(kernel_shape, dtype=tf.float32) init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=(0, 1, 2))) self.kernels = tf.Variable(init) self.b = tf.Variable( np.zeros((self.output_channels,)).astype('float32')) input_shape = list(input_shape) input_shape[0] = 1 dummy_batch = tf.zeros(input_shape) dummy_output = self.fprop(dummy_batch) output_shape = [int(e) for e in dummy_output.get_shape()] output_shape[0] = batch_size self.output_shape = tuple(output_shape)
Example #3
Source File: picklable_model.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def set_input_shape(self, input_shape): batch_size, dim = input_shape self.input_shape = [batch_size, dim] self.output_shape = [batch_size, self.num_hid] if self.init_mode == "norm": init = tf.random_normal([dim, self.num_hid], dtype=tf.float32) init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=0, keep_dims=True)) init = init * self.init_scale elif self.init_mode == "uniform_unit_scaling": scale = np.sqrt(3. / dim) init = tf.random_uniform([dim, self.num_hid], dtype=tf.float32, minval=-scale, maxval=scale) else: raise ValueError(self.init_mode) self.W = PV(init) if self.use_bias: self.b = PV((np.zeros((self.num_hid,)) + self.init_b).astype('float32'))
Example #4
Source File: access_test.py From dnc with Apache License 2.0 | 6 votes |
def testBuildAndTrain(self): inputs = tf.random_normal([TIME_STEPS, BATCH_SIZE, INPUT_SIZE]) output, _ = rnn.dynamic_rnn( cell=self.module, inputs=inputs, initial_state=self.initial_state, time_major=True) targets = np.random.rand(TIME_STEPS, BATCH_SIZE, NUM_READS, WORD_SIZE) loss = tf.reduce_mean(tf.square(output - targets)) train_op = tf.train.GradientDescentOptimizer(1).minimize(loss) init = tf.global_variables_initializer() with self.test_session(): init.run() train_op.run()
Example #5
Source File: distributions.py From DOTA_models with Apache License 2.0 | 6 votes |
def __init__(self, batch_size, z_size, mean, logvar): """Create a diagonal gaussian distribution. Args: batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples. z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor. mean: The N-D mean of the distribution. logvar: The N-D log variance of the diagonal distribution. """ size__xz = [None, z_size] self.mean = mean # bxn already self.logvar = logvar # bxn already self.noise = noise = tf.random_normal(tf.shape(logvar)) self.sample = mean + tf.exp(0.5 * logvar) * noise mean.set_shape(size__xz) logvar.set_shape(size__xz) self.sample.set_shape(size__xz)
Example #6
Source File: test_hessian_vector_products.py From tangent with Apache License 2.0 | 6 votes |
def _test_tf_hvp(func, optimized, tf): a = tf.random_normal(shape=(300,)) v = tf.reshape(a, shape=(-1,)) modes = ['forward', 'reverse'] for mode1 in modes: for mode2 in modes: if mode1 == mode2 == 'forward': continue df = tangent.autodiff( func, mode=mode1, motion='joint', optimized=optimized, check_dims=False) ddf = tangent.autodiff( df, mode=mode2, motion='joint', optimized=optimized, check_dims=False) dx = ddf(a, tf.constant(1.0), v) # We just ensure it computes something in this case. assert dx.shape == a.shape
Example #7
Source File: dcgan_test.py From DeepLab_v3 with MIT License | 6 votes |
def test_generator_graph(self): tf.set_random_seed(1234) # Check graph construction for a number of image size/depths and batch # sizes. for i, batch_size in zip(xrange(3, 7), xrange(3, 8)): tf.reset_default_graph() final_size = 2 ** i noise = tf.random_normal([batch_size, 64]) image, end_points = dcgan.generator( noise, depth=32, final_size=final_size) self.assertAllEqual([batch_size, final_size, final_size, 3], image.shape.as_list()) expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits'] self.assertSetEqual(set(expected_names), set(end_points.keys())) # Check layer depths. for j in range(1, i): layer = end_points['deconv%i' % j] self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1])
Example #8
Source File: blocks_std_test.py From DOTA_models with Apache License 2.0 | 6 votes |
def testLinearShared(self): # Create a linear map which is applied twice on different inputs # (i.e. the weights of the map are shared). linear_map = blocks_std.Linear(6) x1 = tf.random_normal(shape=[1, 5]) x2 = tf.random_normal(shape=[1, 5]) xs = x1 + x2 # Apply the transform with the same weights. y1 = linear_map(x1) y2 = linear_map(x2) ys = linear_map(xs) with self.test_session() as sess: # Initialize all the variables of the graph. tf.global_variables_initializer().run() y1_res, y2_res, ys_res = sess.run([y1, y2, ys]) self.assertAllClose(y1_res + y2_res, ys_res)
Example #9
Source File: DenoisingAutoencoder.py From DOTA_models with Apache License 2.0 | 6 votes |
def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(), scale = 0.1): self.n_input = n_input self.n_hidden = n_hidden self.transfer = transfer_function self.scale = tf.placeholder(tf.float32) self.training_scale = scale network_weights = self._initialize_weights() self.weights = network_weights # model self.x = tf.placeholder(tf.float32, [None, self.n_input]) self.hidden = self.transfer(tf.add(tf.matmul(self.x + scale * tf.random_normal((n_input,)), self.weights['w1']), self.weights['b1'])) self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2']) # cost self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0)) self.optimizer = optimizer.minimize(self.cost) init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init)
Example #10
Source File: policy.py From DOTA_models with Apache License 2.0 | 6 votes |
def sample_action(self, logits, sampling_dim, act_dim, act_type, greedy=False): """Sample an action from a distribution.""" if self.env_spec.is_discrete(act_type): if greedy: act = tf.argmax(logits, 1) else: act = tf.reshape(tf.multinomial(logits, 1), [-1]) elif self.env_spec.is_box(act_type): means = logits[:, :sampling_dim / 2] std = logits[:, sampling_dim / 2:] if greedy: act = means else: batch_size = tf.shape(logits)[0] act = means + std * tf.random_normal([batch_size, act_dim]) else: assert False return act
Example #11
Source File: continuous.py From tensorflow_RL with MIT License | 6 votes |
def __init__(self, name, state_size, output_size): self.state_size = state_size self.output_size = output_size with tf.variable_scope(name): self.input = tf.placeholder(tf.float32, shape=[None, self.state_size]) self.action = tf.placeholder(tf.float32, shape=[None, self.output_size]) self.l1 = tf.layers.dense(inputs=self.input, units=128, activation=tf.nn.relu) self.l2 = tf.layers.dense(inputs=self.l1, units=128, activation=tf.nn.relu) self.l3 = tf.layers.dense(inputs=self.l2, units=128, activation=tf.nn.relu) self.mu = tf.layers.dense(inputs=self.l3, units=self.output_size, activation=None) self.log_std = tf.get_variable(name='log_std', initializer= -0.5 * np.ones(self.output_size, dtype=np.float32)) self.std = tf.exp(self.log_std) self.pi = self.mu + tf.random_normal(tf.shape(self.mu)) * self.std self.logp = gaussian_likelihood(self.action, self.mu, self.log_std) self.logp_pi = gaussian_likelihood(self.pi, self.mu, self.log_std) self.scope = tf.get_variable_scope().name
Example #12
Source File: train_policy.py From cs294-112_hws with MIT License | 6 votes |
def sample_action(self, policy_parameters): """ constructs a symbolic operation for stochastically sampling from the policy distribution arguments: policy_parameters mean, log_std) of a Gaussian distribution over actions sy_mean: (batch_size, self.ac_dim) sy_logstd: (batch_size, self.ac_dim) returns: sy_sampled_ac: (batch_size, self.ac_dim) """ sy_mean, sy_logstd = policy_parameters sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1) return sy_sampled_ac
Example #13
Source File: value_functions.py From HardRLWithYoutube with MIT License | 6 votes |
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613 X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg') wd_dict = {} h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0] sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n)) wd_loss = tf.get_collection("vf_losses", None) loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss) loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n))) self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ async=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables(): if "vf" in var.name: vf_var_list.append(var) update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list) self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101 U.initialize() # Initialize uninitialized TF variables
Example #14
Source File: ops.py From tensorflow-alexnet with MIT License | 6 votes |
def fc(inputs, output_size, init_bias=0.0, activation_func=tf.nn.relu, stddev=0.01): input_shape = inputs.get_shape().as_list() if len(input_shape) == 4: fc_weights = tf.Variable( tf.random_normal([input_shape[1] * input_shape[2] * input_shape[3], output_size], dtype=tf.float32, stddev=stddev), name='weights') inputs = tf.reshape(inputs, [-1, fc_weights.get_shape().as_list()[0]]) else: fc_weights = tf.Variable(tf.random_normal([input_shape[-1], output_size], dtype=tf.float32, stddev=stddev), name='weights') fc_biases = tf.Variable(tf.constant(init_bias, shape=[output_size], dtype=tf.float32), name='biases') fc_layer = tf.matmul(inputs, fc_weights) fc_layer = tf.nn.bias_add(fc_layer, fc_biases) if activation_func: fc_layer = activation_func(fc_layer) return fc_layer
Example #15
Source File: common_layers_test.py From fine-lm with MIT License | 6 votes |
def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum): channels = 3 pred = tf.random_normal([batch, height, width, num_mixtures * 10]) labels = tf.random_uniform([batch, height, width, channels], minval=0, maxval=256, dtype=tf.int32) actual_loss_num, actual_loss_den = common_layers.dml_loss( pred=pred, labels=labels, reduce_sum=reduce_sum) actual_loss = actual_loss_num / actual_loss_den real_labels = common_layers.convert_rgb_to_symmetric_real(labels) expected_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=real_labels) / channels if reduce_sum: expected_loss = tf.reduce_mean(expected_loss) with self.test_session() as sess: actual_loss_val, expected_loss_val = sess.run( [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val)
Example #16
Source File: common_image_attention_test.py From fine-lm with MIT License | 6 votes |
def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth): batch = 1 height = 8 width = 8 channels = 3 rows = height if likelihood == common_image_attention.DistributionType.CAT: cols = channels * width else: cols = width hparams = tf.contrib.training.HParams( hidden_size=2, likelihood=likelihood, mode=tf.estimator.ModeKeys.TRAIN, num_mixtures=num_mixtures, ) decoder_output = tf.random_normal([batch, rows, cols, hparams.hidden_size]) targets = tf.random_uniform([batch, height, width, channels], minval=-1., maxval=1.) output = common_image_attention.create_output( decoder_output, rows, cols, targets, hparams) if hparams.likelihood == common_image_attention.DistributionType.CAT: self.assertEqual(output.shape, (batch, height, width, channels, depth)) else: self.assertEqual(output.shape, (batch, height, width, depth))
Example #17
Source File: value_functions.py From lirpg with MIT License | 6 votes |
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613 X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg') wd_dict = {} h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)) vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0] sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n)) wd_loss = tf.get_collection("vf_losses", None) loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss) loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n))) self._predict = U.function([X], vpred_n) optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \ clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \ async=1, kfac_update=2, cold_iter=50, \ weight_decay_dict=wd_dict, max_grad_norm=None) vf_var_list = [] for var in tf.trainable_variables(): if "vf" in var.name: vf_var_list.append(var) update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list) self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101 U.initialize() # Initialize uninitialized TF variables
Example #18
Source File: cnn2d.py From deep_architect with MIT License | 5 votes |
def kaiming2015delving_initializer_conv(gain=1.0): def init_fn(shape): n = np.product(shape) stddev = gain * np.sqrt(2.0 / n) init_vals = tf.random_normal(shape, 0.0, stddev) return init_vals return init_fn
Example #19
Source File: ddpg.py From HardRLWithYoutube with MIT License | 5 votes |
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev): assert len(actor.vars) == len(perturbed_actor.vars) assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars) updates = [] for var, perturbed_var in zip(actor.vars, perturbed_actor.vars): if var in actor.perturbable_vars: logger.info(' {} <- {} + noise'.format(perturbed_var.name, var.name)) updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else: logger.info(' {} <- {}'.format(perturbed_var.name, var.name)) updates.append(tf.assign(perturbed_var, var)) assert len(updates) == len(actor.vars) return tf.group(*updates)
Example #20
Source File: policies.py From lirpg with MIT License | 5 votes |
def __init__(self, ob_dim, ac_dim): # Here we'll construct a bunch of expressions, which will be used in two places: # (1) When sampling actions # (2) When computing loss functions, for the policy update # Variables specific to (1) have the word "sampled" in them, # whereas variables specific to (2) have the word "old" in them ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate wd_dict = {} h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output self.wd_dict = wd_dict self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs logstd_1a = tf.expand_dims(logstd_1a, 0) std_1a = tf.exp(logstd_1a) std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1]) ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1) sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform. logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy) kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim)) #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy self.compute_kl = U.function([ob_no, oldac_dist], kl) self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss U.initialize() # Initialize uninitialized TF variables
Example #21
Source File: utils.py From SentenceFunction with Apache License 2.0 | 5 votes |
def sample_gaussian(mu, logvar): epsilon = tf.random_normal(tf.shape(logvar), name="epsilon") std = tf.exp(0.5 * logvar) z = mu + tf.multiply(std, epsilon) return z
Example #22
Source File: image_utils_test.py From fine-lm with MIT License | 5 votes |
def testMakeMultiscaleDilatedLarger(self): image = tf.random_normal([256, 256, 3]) resolutions = [257] with self.assertRaisesRegexp(ValueError, "strides.* must be non-zero"): _ = image_utils.make_multiscale_dilated(image, resolutions)
Example #23
Source File: image_utils_test.py From fine-lm with MIT License | 5 votes |
def testMakeMultiscaleDilatedIndivisible(self): image = tf.random_normal([256, 256, 3]) resolutions = [255] scaled_images = image_utils.make_multiscale_dilated(image, resolutions) self.assertEqual(scaled_images[0].shape, (256, 256, 3))
Example #24
Source File: discretization_test.py From fine-lm with MIT License | 5 votes |
def testProjectHidden(self): hidden_size = 60 block_dim = 20 num_blocks = 3 x = tf.zeros(shape=[1, hidden_size], dtype=tf.float32) projection_tensors = tf.random_normal( shape=[num_blocks, hidden_size, block_dim], dtype=tf.float32) x_projected = discretization.project_hidden(x, projection_tensors, hidden_size, num_blocks) with self.test_session() as sess: tf.global_variables_initializer().run() x_projected_eval = sess.run(x_projected) self.assertEqual(np.shape(x_projected_eval), (1, num_blocks, block_dim)) self.assertTrue(np.all(x_projected_eval == 0))
Example #25
Source File: distributions.py From lirpg with MIT License | 5 votes |
def sample(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean))
Example #26
Source File: ddpg.py From lirpg with MIT License | 5 votes |
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev): assert len(actor.vars) == len(perturbed_actor.vars) assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars) updates = [] for var, perturbed_var in zip(actor.vars, perturbed_actor.vars): if var in actor.perturbable_vars: logger.info(' {} <- {} + noise'.format(perturbed_var.name, var.name)) updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else: logger.info(' {} <- {}'.format(perturbed_var.name, var.name)) updates.append(tf.assign(perturbed_var, var)) assert len(updates) == len(actor.vars) return tf.group(*updates)
Example #27
Source File: policies.py From HardRLWithYoutube with MIT License | 5 votes |
def __init__(self, ob_dim, ac_dim): # Here we'll construct a bunch of expressions, which will be used in two places: # (1) When sampling actions # (2) When computing loss functions, for the policy update # Variables specific to (1) have the word "sampled" in them, # whereas variables specific to (2) have the word "old" in them ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate wd_dict = {} h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict)) mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output self.wd_dict = wd_dict self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs logstd_1a = tf.expand_dims(logstd_1a, 0) std_1a = tf.exp(logstd_1a) std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1]) ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1) sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform. logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy) kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim)) #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy self.compute_kl = U.function([ob_no, oldac_dist], kl) self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss U.initialize() # Initialize uninitialized TF variables
Example #28
Source File: ddpg_learner.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev): assert len(actor.vars) == len(perturbed_actor.vars) assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars) updates = [] for var, perturbed_var in zip(actor.vars, perturbed_actor.vars): if var in actor.perturbable_vars: logger.info(' {} <- {} + noise'.format(perturbed_var.name, var.name)) updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else: logger.info(' {} <- {}'.format(perturbed_var.name, var.name)) updates.append(tf.assign(perturbed_var, var)) assert len(updates) == len(actor.vars) return tf.group(*updates)
Example #29
Source File: distributions.py From HardRLWithYoutube with MIT License | 5 votes |
def sample(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean))
Example #30
Source File: distributions.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def sample(self): return self.mean + self.std * tf.random_normal(tf.shape(self.mean))