Python Examples of baselines.common.tf_util.get

Source File: running_mean_std.py From HardRLWithYoutube with MIT License

6 votes

def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)

        
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)    
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count()

Source File: running_mean_std.py From baselines with MIT License

6 votes

def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)


        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count()

Source File: running_mean_std.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)


        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count()

Source File: running_mean_std.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)


        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count()

Source File: mpi_adam_optimizer.py From baselines with MIT License

6 votes

def test_nonfreeze():
    np.random.seed(0)
    tf.set_random_seed(0)

    a = tf.Variable(np.random.randn(3).astype('float32'))
    b = tf.Variable(np.random.randn(2,5).astype('float32'))
    loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b))

    stepsize = 1e-2
    # for some reason the session config with inter_op_parallelism_threads was causing
    # nested sess.run calls to freeze
    config = tf.ConfigProto(inter_op_parallelism_threads=1)
    sess = U.get_session(config=config)
    update_op = MpiAdamOptimizer(comm=MPI.COMM_WORLD, learning_rate=stepsize).minimize(loss)
    sess.run(tf.global_variables_initializer())
    losslist_ref = []
    for i in range(100):
        l,_ = sess.run([loss, update_op])
        print(i, l)
        losslist_ref.append(l)

Source File: running_mean_std.py From ICML2019-TREX with MIT License

6 votes

def __init__(self, epsilon=1e-4, shape=(), scope=''):
        sess = get_session()

        self._new_mean = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_var = tf.placeholder(shape=shape, dtype=tf.float64)
        self._new_count = tf.placeholder(shape=(), dtype=tf.float64)


        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            self._mean  = tf.get_variable('mean',  initializer=np.zeros(shape, 'float64'),      dtype=tf.float64)
            self._var   = tf.get_variable('std',   initializer=np.ones(shape, 'float64'),       dtype=tf.float64)
            self._count = tf.get_variable('count', initializer=np.full((), epsilon, 'float64'), dtype=tf.float64)

        self.update_ops = tf.group([
            self._var.assign(self._new_var),
            self._mean.assign(self._new_mean),
            self._count.assign(self._new_count)
        ])

        sess.run(tf.variables_initializer([self._mean, self._var, self._count]))
        self.sess = sess
        self._set_mean_var_count()

Source File: pposgd_sensor.py From midlevel-reps with MIT License

5 votes

def load(path):
    with open(path, "rb") as f:
        model_data= cloudpickle.load(f)
    sess = U.get_session()
    sess.__enter__()
    with tempfile.TemporaryDirectory() as td:
        arc_path = os.path.join(td, "packed.zip")
        with open(arc_path, "wb") as f:
            f.write(model_data)

        zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
        U.load_state(os.path.join(td, "model"))
    #return ActWrapper(act, act_params)

Source File: statistics.py From lirpg with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: pposgd_fuse.py From midlevel-reps with MIT License

5 votes

def load(path):
    with open(path, "rb") as f:
        model_data = cloudpickle.load(f)
    sess = U.get_session()
    sess.__enter__()
    with tempfile.TemporaryDirectory() as td:
        arc_path = os.path.join(td, "packed.zip")
        with open(arc_path, "wb") as f:
            f.write(model_data)

        zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
        U.load_state(os.path.join(td, "model"))
    # return ActWrapper(act, act_params)

Source File: pposgd_simple.py From midlevel-reps with MIT License

5 votes

def load(path):
    with open(path, "rb") as f:
        model_data= cloudpickle.load(f)
    sess = U.get_session()
    sess.__enter__()
    with tempfile.TemporaryDirectory() as td:
        arc_path = os.path.join(td, "packed.zip")
        with open(arc_path, "wb") as f:
            f.write(model_data)

        zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
        U.load_state(os.path.join(td, "model"))
    #return ActWrapper(act, act_params)

Source File: test_serialization.py From baselines with MIT License

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: statistics.py From baselines with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: statistics.py From self-imitation-learning with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: statistics.py From sonic_contest with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: enjoy.py From rl-attack-detection with MIT License

5 votes

def load_visual_foresight(game_name):
    sess = U.get_session()
    from baselines.deepq.prediction.tfacvp.model import ActionConditionalVideoPredictionModel
    gen_dir = './atari-visual-foresight/'
    model_path = os.path.join(gen_dir, '{}/model.ckpt'.format(game_name))
    mean_path = os.path.join(gen_dir, '{}/mean.npy'.format(game_name))
    game_screen_mean = np.load(mean_path)
    with tf.variable_scope('G'):
        foresight = ActionConditionalVideoPredictionModel(num_act=env.action_space.n, num_channel=1, is_train=False)
        foresight.restore(sess, model_path, 'G')
    return foresight, game_screen_mean

Source File: statistics.py From NoisyNet-DQN with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the 
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k:v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: test_serialization.py From ICML2019-TREX with MIT License

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: statistics.py From ICML2019-TREX with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: test_serialization.py From ICML2019-TREX with MIT License

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: statistics.py From ICML2019-TREX with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: statistics.py From DRL_DeliveryDuel with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: statistics.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: test_serialization.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: test_serialization.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: test_serialization.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: test_serialization.py From HardRLWithYoutube with MIT License

5 votes

def _serialize_variables():
    sess = get_session()
    variables = tf.trainable_variables()    
    values = sess.run(variables)
    return {var.name: value for var, value in zip(variables, values)}

Source File: statistics.py From HardRLWithYoutube with MIT License

5 votes

def add_all_summary(self, writer, values, iter):
        # Note that the order of the incoming ```values``` should be the same as the that of the
        #            ```scalar_keys``` given in ```__init__```
        if np.sum(np.isnan(values)+0) != 0:
            return
        sess = U.get_session()
        keys = self.scalar_summaries_ph + self.histogram_summaries_ph
        feed_dict = {}
        for k, v in zip(keys, values):
            feed_dict.update({k: v})
        summaries_str = sess.run(self.summaries, feed_dict)
        writer.add_summary(summaries_str, iter)

Source File: enjoy.py From rl-attack-detection with MIT License

4 votes

def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense):
    if defense == 'foresight':
        vf, game_screen_mean = load_visual_foresight(game_name)
        pred_obs = deque(maxlen=4)

    num_episodes = 0
    video_recorder = None
    video_recorder = VideoRecorder(
        env, video_path, enabled=video_path is not None)

    t = 0
    obs = env.reset()
    while True:
        #env.unwrapped.render()
        video_recorder.capture_frame()

	# Attack
        if craft_adv_obs != None:
            # Craft adv. examples
            adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0]
            action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
        else:
            # Normal
            action = act(np.array(obs)[None], stochastic=stochastic)[0]

	# Defense
        if t > 4 and defense == 'foresight':
            pred_obs.append(
                foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf,
                        env.action_space.n, t)
            )
            if len(pred_obs) == 4:
                action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0]

        old_obs = obs
        old_action = action

        # RL loop
        obs, rew, done, info = env.step(action)
        t += 1
        if done:
            t = 0
            obs = env.reset()
        if len(info["rewards"]) > num_episodes:
            if len(info["rewards"]) == 1 and video_recorder.enabled:
                # save video of first episode
                print("Saved video.")
                video_recorder.close()
                video_recorder.enabled = False
            print(info["rewards"][-1])
            num_episodes = len(info["rewards"])

Source File: a2c.py From HardRLWithYoutube with MIT License

4 votes

def __init__(self, policy, env, nsteps,
            ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4,
            alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'):

        sess = tf_util.get_session()
        nenvs = env.num_envs
        nbatch = nenvs*nsteps


        with tf.variable_scope('a2c_model', reuse=tf.AUTO_REUSE):
            step_model = policy(nenvs, 1, sess)
            train_model = policy(nbatch, nsteps, sess)

        A = tf.placeholder(train_model.action.dtype, train_model.action.shape)
        ADV = tf.placeholder(tf.float32, [nbatch])
        R = tf.placeholder(tf.float32, [nbatch])
        LR = tf.placeholder(tf.float32, [])

        neglogpac = train_model.pd.neglogp(A)
        entropy = tf.reduce_mean(train_model.pd.entropy())

        pg_loss = tf.reduce_mean(ADV * neglogpac)
        vf_loss = losses.mean_squared_error(tf.squeeze(train_model.vf), R)

        loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef

        params = find_trainable_variables("a2c_model")
        grads = tf.gradients(loss, params)
        if max_grad_norm is not None:
            grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        grads = list(zip(grads, params))
        trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon)
        _train = trainer.apply_gradients(grads)

        lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule)

        def train(obs, states, rewards, masks, actions, values):
            advs = rewards - values
            for step in range(len(obs)):
                cur_lr = lr.value()

            td_map = {train_model.X:obs, A:actions, ADV:advs, R:rewards, LR:cur_lr}
            if states is not None:
                td_map[train_model.S] = states
                td_map[train_model.M] = masks
            policy_loss, value_loss, policy_entropy, _ = sess.run(
                [pg_loss, vf_loss, entropy, _train],
                td_map
            )
            return policy_loss, value_loss, policy_entropy


        self.train = train
        self.train_model = train_model
        self.step_model = step_model
        self.step = step_model.step
        self.value = step_model.value
        self.initial_state = step_model.initial_state
        self.save = functools.partial(tf_util.save_variables, sess=sess)
        self.load = functools.partial(tf_util.load_variables, sess=sess)
        tf.global_variables_initializer().run(session=sess)

Python baselines.common.tf_util.get_session() Examples