Python baselines.common.tf_util.save_state() Examples
The following are 30
code examples of baselines.common.tf_util.save_state().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
baselines.common.tf_util
, or try the search function
.
Example #1
Source File: simple.py From BackpropThroughTheVoidRL with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #2
Source File: pposgd_simple.py From midlevel-reps with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data), f)
Example #3
Source File: pposgd_fuse.py From midlevel-reps with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data), f)
Example #4
Source File: train.py From rl-attack-detection with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #5
Source File: train.py From NoisyNet-DQN with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #6
Source File: pposgd_sensor.py From midlevel-reps with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data), f)
Example #7
Source File: train.py From deeprl-baselines with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #8
Source File: simple.py From deeprl-baselines with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #9
Source File: rainbow.py From deeprl-baselines with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #10
Source File: train_atari.py From distributional-dqn with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) # requires 32gb of memory for this to work relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #11
Source File: train.py From learning2run with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #12
Source File: dqfd.py From pysc2-examples with Apache License 2.0 | 6 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data), f)
Example #13
Source File: deepq_mineral_4way.py From pysc2-examples with Apache License 2.0 | 6 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data), f)
Example #14
Source File: train.py From emdqn with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #15
Source File: train.py From BackpropThroughTheVoidRL with MIT License | 6 votes |
def maybe_save_model(savedir, container, state): """This function checkpoints the model and state of the training algorithm.""" if savedir is None: return start_time = time.time() model_dir = "model-{}".format(state["num_iters"]) U.save_state(os.path.join(savedir, model_dir, "saved")) if container is not None: container.put(os.path.join(savedir, model_dir), model_dir) relatively_safe_pickle_dump(state, os.path.join(savedir, 'training_state.pkl.zip'), compression=True) if container is not None: container.put(os.path.join(savedir, 'training_state.pkl.zip'), 'training_state.pkl.zip') relatively_safe_pickle_dump(state["monitor_state"], os.path.join(savedir, 'monitor_state.pkl')) if container is not None: container.put(os.path.join(savedir, 'monitor_state.pkl'), 'monitor_state.pkl') logger.log("Saved model in {} seconds\n".format(time.time() - start_time))
Example #16
Source File: simple.py From distributional-dqn with MIT License | 5 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data, self._act_params), f)
Example #17
Source File: deepq.py From mario-rl-tutorial with Apache License 2.0 | 5 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data, self._act_params), f)
Example #18
Source File: deepq_mineral_shards.py From pysc2-examples with Apache License 2.0 | 5 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data), f)
Example #19
Source File: simple.py From emdqn with MIT License | 5 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data, self._act_params), f)
Example #20
Source File: dqfd.py From A-Guide-to-DeepMinds-StarCraft-AI-Environment with Apache License 2.0 | 5 votes |
def save(self, path): """Save model to a pickle located at `path`""" with tempfile.TemporaryDirectory() as td: U.save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: dill.dump((model_data), f)
Example #21
Source File: policies.py From baselines with MIT License | 5 votes |
def save(self, save_path): tf_util.save_state(save_path, sess=self.sess)
Example #22
Source File: run_humanoid.py From baselines with MIT License | 5 votes |
def train(num_timesteps, seed, model_path=None): env_id = 'Humanoid-v2' from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = make_mujoco_env(env_id, seed) # parameters below were the best found in a simple random search # these are good enough to make humanoid walk, but whether those are # an absolute best or not is not certain env = RewScale(env, 0.1) logger.log("NOTE: reward will be scaled by a factor of 10 in logged stats. Check the monitor for unscaled reward.") pi = pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.1, entcoeff=0.0, optim_epochs=10, optim_stepsize=1e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', ) env.close() if model_path: U.save_state(model_path) return pi
Example #23
Source File: behavior_clone.py From ICML2019-TREX with MIT License | 5 votes |
def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4, adam_epsilon=1e-5, optim_stepsize=3e-4, ckpt_dir=None, log_dir=None, task_name=None, verbose=False): val_per_iter = int(max_iters/10) ob_space = env.observation_space ac_space = env.action_space pi = policy_func("pi", ob_space, ac_space) # Construct network for new policy # placeholder ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) stochastic = U.get_placeholder_cached(name="stochastic") loss = tf.reduce_mean(tf.square(ac-pi.ac)) var_list = pi.get_trainable_variables() adam = MpiAdam(var_list, epsilon=adam_epsilon) lossandgrad = U.function([ob, ac, stochastic], [loss]+[U.flatgrad(loss, var_list)]) U.initialize() adam.sync() logger.log("Pretraining with Behavior Cloning...") for iter_so_far in tqdm(range(int(max_iters))): ob_expert, ac_expert = dataset.get_next_batch(optim_batch_size, 'train') train_loss, g = lossandgrad(ob_expert, ac_expert, True) adam.update(g, optim_stepsize) if verbose and iter_so_far % val_per_iter == 0: ob_expert, ac_expert = dataset.get_next_batch(-1, 'val') val_loss, _ = lossandgrad(ob_expert, ac_expert, True) logger.log("Training loss: {}, Validation loss: {}".format(train_loss, val_loss)) if ckpt_dir is None: savedir_fname = tempfile.TemporaryDirectory().name else: savedir_fname = osp.join(ckpt_dir, task_name) U.save_state(savedir_fname, var_list=pi.get_variables()) return savedir_fname
Example #24
Source File: behavior_clone.py From HardRLWithYoutube with MIT License | 5 votes |
def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4, adam_epsilon=1e-5, optim_stepsize=3e-4, ckpt_dir=None, log_dir=None, task_name=None, verbose=False): val_per_iter = int(max_iters/10) ob_space = env.observation_space ac_space = env.action_space pi = policy_func("pi", ob_space, ac_space) # Construct network for new policy # placeholder ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) stochastic = U.get_placeholder_cached(name="stochastic") loss = tf.reduce_mean(tf.square(ac-pi.ac)) var_list = pi.get_trainable_variables() adam = MpiAdam(var_list, epsilon=adam_epsilon) lossandgrad = U.function([ob, ac, stochastic], [loss]+[U.flatgrad(loss, var_list)]) U.initialize() adam.sync() logger.log("Pretraining with Behavior Cloning...") for iter_so_far in tqdm(range(int(max_iters))): ob_expert, ac_expert = dataset.get_next_batch(optim_batch_size, 'train') train_loss, g = lossandgrad(ob_expert, ac_expert, True) adam.update(g, optim_stepsize) if verbose and iter_so_far % val_per_iter == 0: ob_expert, ac_expert = dataset.get_next_batch(-1, 'val') val_loss, _ = lossandgrad(ob_expert, ac_expert, True) logger.log("Training loss: {}, Validation loss: {}".format(train_loss, val_loss)) if ckpt_dir is None: savedir_fname = tempfile.TemporaryDirectory().name else: savedir_fname = osp.join(ckpt_dir, task_name) U.save_state(savedir_fname, var_list=pi.get_variables()) return savedir_fname
Example #25
Source File: run_humanoid.py From HardRLWithYoutube with MIT License | 5 votes |
def train(num_timesteps, seed, model_path=None): env_id = 'Humanoid-v2' from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = make_mujoco_env(env_id, seed) # parameters below were the best found in a simple random search # these are good enough to make humanoid walk, but whether those are # an absolute best or not is not certain env = RewScale(env, 0.1) pi = pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', ) env.close() if model_path: U.save_state(model_path) return pi
Example #26
Source File: policies.py From HardRLWithYoutube with MIT License | 5 votes |
def save(self, save_path): tf_util.save_state(save_path, sess=self.sess)
Example #27
Source File: policies.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def save(self, save_path): tf_util.save_state(save_path, sess=self.sess)
Example #28
Source File: policies.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def save(self, save_path): tf_util.save_state(save_path, sess=self.sess)
Example #29
Source File: policies.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def save(self, save_path): tf_util.save_state(save_path, sess=self.sess)
Example #30
Source File: behavior_clone.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 5 votes |
def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4, adam_epsilon=1e-5, optim_stepsize=3e-4, ckpt_dir=None, log_dir=None, task_name=None, verbose=False): val_per_iter = int(max_iters/10) ob_space = env.observation_space ac_space = env.action_space pi = policy_func("pi", ob_space, ac_space) # Construct network for new policy # placeholder ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) stochastic = U.get_placeholder_cached(name="stochastic") loss = tf.reduce_mean(tf.square(ac-pi.ac)) var_list = pi.get_trainable_variables() adam = MpiAdam(var_list, epsilon=adam_epsilon) lossandgrad = U.function([ob, ac, stochastic], [loss]+[U.flatgrad(loss, var_list)]) U.initialize() adam.sync() logger.log("Pretraining with Behavior Cloning...") for iter_so_far in tqdm(range(int(max_iters))): ob_expert, ac_expert = dataset.get_next_batch(optim_batch_size, 'train') train_loss, g = lossandgrad(ob_expert, ac_expert, True) adam.update(g, optim_stepsize) if verbose and iter_so_far % val_per_iter == 0: ob_expert, ac_expert = dataset.get_next_batch(-1, 'val') val_loss, _ = lossandgrad(ob_expert, ac_expert, True) logger.log("Training loss: {}, Validation loss: {}".format(train_loss, val_loss)) if ckpt_dir is None: savedir_fname = tempfile.TemporaryDirectory().name else: savedir_fname = osp.join(ckpt_dir, task_name) U.save_state(savedir_fname, var_list=pi.get_variables()) return savedir_fname