Python baselines.common.tf_util.get_session() Examples
The following are 29
code examples of baselines.common.tf_util.get_session().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
baselines.common.tf_util
, or try the search function
.
Example #1
Source File: running_mean_std.py From HardRLWithYoutube with MIT License | 6 votes |
def __init__(self, epsilon=1e-4, shape=(), scope=''): sess = get_session() self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64) self._new_var = tf.placeholder(shape=shape, dtype=tf.float64) self._new_count = tf.placeholder(shape=(), dtype=tf.float64) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64) self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64) self.update_ops = tf.group([ self._var.assign(self._new_var), self._mean.assign(self._new_mean), self._count.assign(self._new_count) ]) sess.run(tf.variables_initializer([self._mean, self._var, self._count])) self.sess = sess self._set_mean_var_count()
Example #2
Source File: running_mean_std.py From baselines with MIT License | 6 votes |
def __init__(self, epsilon=1e-4, shape=(), scope=''): sess = get_session() self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64) self._new_var = tf.placeholder(shape=shape, dtype=tf.float64) self._new_count = tf.placeholder(shape=(), dtype=tf.float64) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64) self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64) self.update_ops = tf.group([ self._var.assign(self._new_var), self._mean.assign(self._new_mean), self._count.assign(self._new_count) ]) sess.run(tf.variables_initializer([self._mean, self._var, self._count])) self.sess = sess self._set_mean_var_count()
Example #3
Source File: running_mean_std.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__(self, epsilon=1e-4, shape=(), scope=''): sess = get_session() self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64) self._new_var = tf.placeholder(shape=shape, dtype=tf.float64) self._new_count = tf.placeholder(shape=(), dtype=tf.float64) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64) self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64) self.update_ops = tf.group([ self._var.assign(self._new_var), self._mean.assign(self._new_mean), self._count.assign(self._new_count) ]) sess.run(tf.variables_initializer([self._mean, self._var, self._count])) self.sess = sess self._set_mean_var_count()
Example #4
Source File: running_mean_std.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__(self, epsilon=1e-4, shape=(), scope=''): sess = get_session() self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64) self._new_var = tf.placeholder(shape=shape, dtype=tf.float64) self._new_count = tf.placeholder(shape=(), dtype=tf.float64) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64) self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64) self.update_ops = tf.group([ self._var.assign(self._new_var), self._mean.assign(self._new_mean), self._count.assign(self._new_count) ]) sess.run(tf.variables_initializer([self._mean, self._var, self._count])) self.sess = sess self._set_mean_var_count()
Example #5
Source File: mpi_adam_optimizer.py From baselines with MIT License | 6 votes |
def test_nonfreeze(): np.random.seed(0) tf.set_random_seed(0) a = tf.Variable(np.random.randn(3).astype('float32')) b = tf.Variable(np.random.randn(2,5).astype('float32')) loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b)) stepsize = 1e-2 # for some reason the session config with inter_op_parallelism_threads was causing # nested sess.run calls to freeze config = tf.ConfigProto(inter_op_parallelism_threads=1) sess = U.get_session(config=config) update_op = MpiAdamOptimizer(comm=MPI.COMM_WORLD, learning_rate=stepsize).minimize(loss) sess.run(tf.global_variables_initializer()) losslist_ref = [] for i in range(100): l,_ = sess.run([loss, update_op]) print(i, l) losslist_ref.append(l)
Example #6
Source File: running_mean_std.py From ICML2019-TREX with MIT License | 6 votes |
def __init__(self, epsilon=1e-4, shape=(), scope=''): sess = get_session() self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64) self._new_var = tf.placeholder(shape=shape, dtype=tf.float64) self._new_count = tf.placeholder(shape=(), dtype=tf.float64) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): self._mean = tf.get_variable('mean', initializer=np.zeros(shape, 'float64'), dtype=tf.float64) self._var = tf.get_variable('std', initializer=np.ones(shape, 'float64'), dtype=tf.float64) self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64) self.update_ops = tf.group([ self._var.assign(self._new_var), self._mean.assign(self._new_mean), self._count.assign(self._new_count) ]) sess.run(tf.variables_initializer([self._mean, self._var, self._count])) self.sess = sess self._set_mean_var_count()
Example #7
Source File: pposgd_sensor.py From midlevel-reps with MIT License | 5 votes |
def load(path): with open(path, "rb") as f: model_data= cloudpickle.load(f) sess = U.get_session() sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) #return ActWrapper(act, act_params)
Example #8
Source File: statistics.py From lirpg with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #9
Source File: pposgd_fuse.py From midlevel-reps with MIT License | 5 votes |
def load(path): with open(path, "rb") as f: model_data = cloudpickle.load(f) sess = U.get_session() sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) # return ActWrapper(act, act_params)
Example #10
Source File: pposgd_simple.py From midlevel-reps with MIT License | 5 votes |
def load(path): with open(path, "rb") as f: model_data= cloudpickle.load(f) sess = U.get_session() sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) #return ActWrapper(act, act_params)
Example #11
Source File: test_serialization.py From baselines with MIT License | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #12
Source File: statistics.py From baselines with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #13
Source File: statistics.py From self-imitation-learning with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #14
Source File: statistics.py From sonic_contest with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #15
Source File: enjoy.py From rl-attack-detection with MIT License | 5 votes |
def load_visual_foresight(game_name): sess = U.get_session() from baselines.deepq.prediction.tfacvp.model import ActionConditionalVideoPredictionModel gen_dir = './atari-visual-foresight/' model_path = os.path.join(gen_dir, '{}/model.ckpt'.format(game_name)) mean_path = os.path.join(gen_dir, '{}/mean.npy'.format(game_name)) game_screen_mean = np.load(mean_path) with tf.variable_scope('G'): foresight = ActionConditionalVideoPredictionModel(num_act=env.action_space.n, num_channel=1, is_train=False) foresight.restore(sess, model_path, 'G') return foresight, game_screen_mean
Example #16
Source File: statistics.py From NoisyNet-DQN with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k:v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #17
Source File: test_serialization.py From ICML2019-TREX with MIT License | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #18
Source File: statistics.py From ICML2019-TREX with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #19
Source File: test_serialization.py From ICML2019-TREX with MIT License | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #20
Source File: statistics.py From ICML2019-TREX with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #21
Source File: statistics.py From DRL_DeliveryDuel with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #22
Source File: statistics.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #23
Source File: test_serialization.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #24
Source File: test_serialization.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #25
Source File: test_serialization.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #26
Source File: test_serialization.py From HardRLWithYoutube with MIT License | 5 votes |
def _serialize_variables(): sess = get_session() variables = tf.trainable_variables() values = sess.run(variables) return {var.name: value for var, value in zip(variables, values)}
Example #27
Source File: statistics.py From HardRLWithYoutube with MIT License | 5 votes |
def add_all_summary(self, writer, values, iter): # Note that the order of the incoming ```values``` should be the same as the that of the # ```scalar_keys``` given in ```__init__``` if np.sum(np.isnan(values)+0) != 0: return sess = U.get_session() keys = self.scalar_summaries_ph + self.histogram_summaries_ph feed_dict = {} for k, v in zip(keys, values): feed_dict.update({k: v}) summaries_str = sess.run(self.summaries, feed_dict) writer.add_summary(summaries_str, iter)
Example #28
Source File: enjoy.py From rl-attack-detection with MIT License | 4 votes |
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense): if defense == 'foresight': vf, game_screen_mean = load_visual_foresight(game_name) pred_obs = deque(maxlen=4) num_episodes = 0 video_recorder = None video_recorder = VideoRecorder( env, video_path, enabled=video_path is not None) t = 0 obs = env.reset() while True: #env.unwrapped.render() video_recorder.capture_frame() # Attack if craft_adv_obs != None: # Craft adv. examples adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0] action = act(np.array(adv_obs)[None], stochastic=stochastic)[0] else: # Normal action = act(np.array(obs)[None], stochastic=stochastic)[0] # Defense if t > 4 and defense == 'foresight': pred_obs.append( foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf, env.action_space.n, t) ) if len(pred_obs) == 4: action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0] old_obs = obs old_action = action # RL loop obs, rew, done, info = env.step(action) t += 1 if done: t = 0 obs = env.reset() if len(info["rewards"]) > num_episodes: if len(info["rewards"]) == 1 and video_recorder.enabled: # save video of first episode print("Saved video.") video_recorder.close() video_recorder.enabled = False print(info["rewards"][-1]) num_episodes = len(info["rewards"])
Example #29
Source File: a2c.py From HardRLWithYoutube with MIT License | 4 votes |
def __init__(self, policy, env, nsteps, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'): sess = tf_util.get_session() nenvs = env.num_envs nbatch = nenvs*nsteps with tf.variable_scope('a2c_model', reuse=tf.AUTO_REUSE): step_model = policy(nenvs, 1, sess) train_model = policy(nbatch, nsteps, sess) A = tf.placeholder(train_model.action.dtype, train_model.action.shape) ADV = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) neglogpac = train_model.pd.neglogp(A) entropy = tf.reduce_mean(train_model.pd.entropy()) pg_loss = tf.reduce_mean(ADV * neglogpac) vf_loss = losses.mean_squared_error(tf.squeeze(train_model.vf), R) loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef params = find_trainable_variables("a2c_model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) def train(obs, states, rewards, masks, actions, values): advs = rewards - values for step in range(len(obs)): cur_lr = lr.value() td_map = {train_model.X:obs, A:actions, ADV:advs, R:rewards, LR:cur_lr} if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks policy_loss, value_loss, policy_entropy, _ = sess.run( [pg_loss, vf_loss, entropy, _train], td_map ) return policy_loss, value_loss, policy_entropy self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = functools.partial(tf_util.save_variables, sess=sess) self.load = functools.partial(tf_util.load_variables, sess=sess) tf.global_variables_initializer().run(session=sess)