Python dm_control.suite.load() Examples
The following are 25
code examples of dm_control.suite.load().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dm_control.suite
, or try the search function
.
Example #1
Source File: tasks.py From planet with Apache License 2.0 | 6 votes |
def _dm_control_env( action_repeat, max_length, domain, task, params, normalize=False, camera_id=None): if isinstance(domain, str): from dm_control import suite env = suite.load(domain, task) else: assert task is None env = domain() if camera_id is None: camera_id = int(params.get('camera_id', 0)) env = control.wrappers.DeepMindWrapper(env, (64, 64), camera_id=camera_id) env = control.wrappers.ActionRepeat(env, action_repeat) if normalize: env = control.wrappers.NormalizeActions(env) env = control.wrappers.MaximumDuration(env, max_length) env = control.wrappers.PixelObservations(env, (64, 64), np.uint8, 'image') env = control.wrappers.ConvertTo32Bit(env) return env
Example #2
Source File: dm_env_wrapper.py From neural_graph_evolution with MIT License | 6 votes |
def __init__(self, args, rand_seed, monitor, width=480, height=480): self.width = width self.height = height task_name = dm_control_util.get_env_names(args.task) from dm_control import suite self.env = suite.load( domain_name=task_name[0], task_name=task_name[1], task_kwargs={'random': rand_seed} ) self._base_path = init_path.get_abs_base_dir() self.NUM_EPISODE_RECORED = NUM_EPISODE_RECORED self._is_dirname = True # save the video self._monitor = monitor self._current_episode = 0 if self._monitor: self.init_save(args)
Example #3
Source File: loader_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_load_without_kwargs(self): env = suite.load('cartpole', 'swingup') self.assertIsInstance(env, control.Environment)
Example #4
Source File: make_env.py From surreal with MIT License | 5 votes |
def make_dm_control(env_name, env_config): from dm_control import suite from dm_control.suite.wrappers import pixels from .dm_wrapper import DMControlAdapter, DMControlDummyWrapper pixel_input = env_config.pixel_input domain_name, task_name = env_name.split('-') env = suite.load(domain_name=domain_name, task_name=task_name) if pixel_input: if os.getenv('DISABLE_MUJOCO_RENDERING'): # We are asking for rendering on a pod that cannot support rendering, # This happens in GPU based learners when we only want to create the environment # to see the dimensions. # So we will add a dummy environment # TODO: add a dummy wrapper that only contains the correct specs env = DMControlDummyWrapper(env) #... else: env = pixels.Wrapper(env, render_kwargs={'height': 84, 'width': 84, 'camera_id': 0}) # TODO: what to do with reward visualization # Reward visualization should only be done in the eval agent # env = suite.load(domain_name=domain_name, task_name=task_name, visualize_reward=record_video) env = DMControlAdapter(env, pixel_input) env = FilterWrapper(env, env_config) env = ObservationConcatenationWrapper(env) if pixel_input: env = TransposeWrapper(env) env = GrayscaleWrapper(env) if env_config.frame_stacks > 1: env = FrameStackWrapper(env, env_config) env_config.action_spec = env.action_spec() env_config.obs_spec = env.observation_spec() return env, env_config
Example #5
Source File: dm_suite_env.py From dm2gym with MIT License | 5 votes |
def __init__(self, domain_name, task_name, task_kwargs=None, environment_kwargs=None, visualize_reward=False): self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs, environment_kwargs=environment_kwargs, visualize_reward=visualize_reward) self.metadata = {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': round(1.0/self.env.control_timestep())} self.observation_space = convert_dm_control_to_gym_space(self.env.observation_spec()) self.action_space = convert_dm_control_to_gym_space(self.env.action_spec()) self.viewer = None
Example #6
Source File: wrappers.py From dreamer with Apache License 2.0 | 5 votes |
def __init__(self, domain, task, render_size=(64, 64), camera_id=0): if isinstance(domain, str): from dm_control import suite self._env = suite.load(domain, task) else: assert task is None self._env = domain() self._render_size = render_size self._camera_id = camera_id
Example #7
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_initial_state_is_randomized(self, domain, task): env = suite.load(domain, task, task_kwargs={'random': 42}) obs1 = env.reset().observation obs2 = env.reset().observation self.assertFalse( all(np.all(obs1[k] == obs2[k]) for k in obs1), 'Two consecutive initial states have identical observations.\n' 'First: {}\nSecond: {}'.format(obs1, obs2))
Example #8
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_observations_dont_contain_constant_elements(self, domain, task): env = suite.load(domain, task) trajectory = make_trajectory(domain=domain, task=task, seed=0, num_episodes=2, max_steps_per_episode=1000) observations = {name: [] for name in env.observation_spec()} for time_step in trajectory: for name, array in six.iteritems(time_step.observation): observations[name].append(array) failures = [] for name, array_list in six.iteritems(observations): # Sampling random uniform actions generally isn't sufficient to trigger # these touch sensors. if (domain in ('manipulator', 'stacker') and name == 'touch' or domain == 'quadruped' and name == 'force_torque'): continue stacked_arrays = np.array(array_list) is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0) has_constant_elements = ( is_constant if np.isscalar(is_constant) else np.any(is_constant)) if has_constant_elements: failures.append((name, is_constant)) self.assertEmpty( failures, msg='The following observation(s) contain constant elements:\n{}' .format('\n'.join(':\t'.join([name, str(is_constant)]) for (name, is_constant) in failures)))
Example #9
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_observation_arrays_dont_share_memory(self, domain, task): env = suite.load(domain, task) first_timestep = env.reset() action = np.zeros(env.action_spec().shape) second_timestep = env.step(action) for name, first_array in six.iteritems(first_timestep.observation): second_array = second_timestep.observation[name] self.assertFalse( np.may_share_memory(first_array, second_array), msg='Consecutive observations of {!r} may share memory.'.format(name))
Example #10
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_task_supports_environment_kwargs(self, domain, task): env = suite.load(domain, task, environment_kwargs=dict(flat_observation=True)) # Check that the kwargs are actually passed through to the environment. self.assertSetEqual(set(env.observation_spec()), {control.FLAT_OBSERVATION_KEY})
Example #11
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_visualize_reward(self, domain, task): env = suite.load(domain, task) env.task.visualize_reward = True action = np.zeros(env.action_spec().shape) with mock.patch.object(env.task, 'get_reward') as mock_get_reward: mock_get_reward.return_value = -3.0 # Rewards < 0 should be clipped. env.reset() mock_get_reward.assert_called_with(env.physics) self.assertCorrectColors(env.physics, reward=0.0) mock_get_reward.reset_mock() mock_get_reward.return_value = 0.5 env.step(action) mock_get_reward.assert_called_with(env.physics) self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value) mock_get_reward.reset_mock() mock_get_reward.return_value = 2.0 # Rewards > 1 should be clipped. env.step(action) mock_get_reward.assert_called_with(env.physics) self.assertCorrectColors(env.physics, reward=1.0) mock_get_reward.reset_mock() mock_get_reward.return_value = 0.25 env.reset() mock_get_reward.assert_called_with(env.physics) self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)
Example #12
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_model_has_at_least_2_cameras(self, domain, task): env = suite.load(domain, task) model = env.physics.model self.assertGreaterEqual(model.ncam, 2, 'Model {!r} should have at least 2 cameras, has {}.' .format(model.name, model.ncam))
Example #13
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_components_have_names(self, domain, task): env = suite.load(domain, task) model = env.physics.model object_types_and_size_fields = [ ('body', 'nbody'), ('joint', 'njnt'), ('geom', 'ngeom'), ('site', 'nsite'), ('camera', 'ncam'), ('light', 'nlight'), ('mesh', 'nmesh'), ('hfield', 'nhfield'), ('texture', 'ntex'), ('material', 'nmat'), ('equality', 'neq'), ('tendon', 'ntendon'), ('actuator', 'nu'), ('sensor', 'nsensor'), ('numeric', 'nnumeric'), ('text', 'ntext'), ('tuple', 'ntuple'), ] for object_type, size_field in object_types_and_size_fields: for idx in range(getattr(model, size_field)): object_name = model.id2name(idx, object_type) self.assertNotEqual(object_name, '', msg='Model {!r} contains unnamed {!r} with ID {}.' .format(model.name, object_type, idx))
Example #14
Source File: suite_test.py From dm_control with Apache License 2.0 | 5 votes |
def make_trajectory(domain, task, seed, **trajectory_kwargs): env = suite.load(domain, task, task_kwargs={'random': seed}) policy = uniform_random_policy(env.action_spec(), random=seed) return step_environment(env, policy, **trajectory_kwargs)
Example #15
Source File: loader_test.py From dm_control with Apache License 2.0 | 5 votes |
def test_load_with_kwargs(self): env = suite.load('cartpole', 'swingup', task_kwargs={'time_limit': 40, 'random': 99}) self.assertIsInstance(env, control.Environment)
Example #16
Source File: env.py From PlaNet with MIT License | 5 votes |
def __init__(self, env, symbolic, seed, max_episode_length, action_repeat, bit_depth): from dm_control import suite from dm_control.suite.wrappers import pixels domain, task = env.split('-') self.symbolic = symbolic self._env = suite.load(domain_name=domain, task_name=task, task_kwargs={'random': seed}) if not symbolic: self._env = pixels.Wrapper(self._env) self.max_episode_length = max_episode_length self.action_repeat = action_repeat if action_repeat != CONTROL_SUITE_ACTION_REPEATS[domain]: print('Using action repeat %d; recommended action repeat for domain is %d' % (action_repeat, CONTROL_SUITE_ACTION_REPEATS[domain])) self.bit_depth = bit_depth
Example #17
Source File: dm_control.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, name: str = "cartpole-balance", visualize_reward: bool = True, n_repeat_action: int = 1, custom_death: "CustomDeath" = None): """ Creates DMControlEnv and initializes the environment. :param domain_name: match dm_control interface. :param task_name: match dm_control interface. :param visualize_reward: match dm_control interface. :param fixed_steps: The number of consecutive times that an action will be applied. This allows us to set the frequency at which the policy will play. :param custom_death: Pro hack to beat the shit out of DeepMind even further. """ from dm_control import suite domain_name, task_name = name.split("-") super(DMControlEnv, self).__init__(name=name, n_repeat_action=n_repeat_action) self._render_i = 0 self._env = suite.load(domain_name=domain_name, task_name=task_name, visualize_reward=visualize_reward) self._name = name self.viewer = [] self._last_time_step = None self._viewer = rendering.SimpleImageViewer() self._custom_death = custom_death self.reset()
Example #18
Source File: environment.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self, domain_name="cartpole", task_name="balance", visualize_reward: bool=True, fixed_steps: int=1, custom_death: "CustomDeath"=None): """ Creates DMControlEnv and initializes the environment. :param domain_name: match dm_control interface. :param task_name: match dm_control interface. :param visualize_reward: match dm_control interface. :param fixed_steps: The number of consecutive times that an action will be applied. This allows us to set the frequency at which the policy will play. :param custom_death: Pro hack to beat the shit out of DeepMind even further. """ from dm_control import suite name = str(domain_name) + ":" + str(task_name) super(DMControlEnv, self).__init__(name=name, state=None) self.fixed_steps = fixed_steps self._render_i = 0 self._env = suite.load(domain_name=domain_name, task_name=task_name, visualize_reward=visualize_reward) self._name = name self.viewer = [] self._last_time_step = None self._custom_death = custom_death self.reset()
Example #19
Source File: dm_control_env.py From mushroom-rl with MIT License | 5 votes |
def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; """ # MDP creation if task_kwargs is None: task_kwargs = dict() task_kwargs['time_limit'] = np.inf # Hack to ignore dm_control time limit. self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space(self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info)
Example #20
Source File: walker_env_wrapper.py From neural_graph_evolution with MIT License | 5 votes |
def __init__(self, args, rand_seed, monitor, width=480, height=480): self.width = width self.height = height self.task = args.task assert 'walker' in self.task or 'hopper' in self.task or 'cheetah' in self.task self.args = args self.is_evo = 'evo' in self.task from dm_control import suite self.env = suite.load( domain_name='walker', task_name='walk', task_kwargs={'random': rand_seed} ) self._base_path = init_path.get_abs_base_dir() self.load_xml(os.path.join(self._base_path, 'env', 'assets/walker.xml')) self.set_get_observation() # overwrite the original get_ob function self.set_get_reward() # overwrite the original reward function self._JOINTS = ['right_hip', 'right_knee', 'right_ankle', 'left_hip', 'left_knee', 'left_ankle'] # save the video self._monitor = monitor self._current_episode = 0 if self._monitor: self.init_save(args)
Example #21
Source File: fish_env_wrapper.py From neural_graph_evolution with MIT License | 5 votes |
def __init__(self, args, rand_seed, monitor, width=480, height=480): self.width = width self.height = height self.task = args.task self.args = args assert 'fish3d' in self.task self.is_evo = 'evo' in self.task if 'easyswim' in self.task: self.target_angle = args.fish_target_angle from dm_control import suite self.env = suite.load( domain_name='fish', task_name='swim', task_kwargs={'random': rand_seed} ) self._base_path = init_path.get_abs_base_dir() self.load_xml(os.path.join(self._base_path, 'env', 'assets/fish3d.xml')) self.set_get_observation() # overwrite the original get_ob function self.set_get_reward() # overwrite the original reward function self._JOINTS = ['tail1', 'tail_twist', 'tail2', 'finright_roll', 'finright_pitch', 'finleft_roll', 'finleft_pitch'] # save the video self._monitor = monitor self._current_episode = 0 if self._monitor: self.init_save(args)
Example #22
Source File: dm_control.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def __init__( self, name: str = "cartpole-balance", visualize_reward: bool = True, n_repeat_action: int = 1, custom_death: "CustomDeath" = None, ): """ Creates DMControlEnv and initializes the environment. :param domain_name: match dm_control interface. :param task_name: match dm_control interface. :param visualize_reward: match dm_control interface. :param fixed_steps: The number of consecutive times that an action will be applied. This allows us to set the frequency at which the policy will play. :param custom_death: Pro hack to beat the shit out of DeepMind even further. """ from dm_control import suite domain_name, task_name = name.split("-") super(DMControlEnv, self).__init__(name=name, n_repeat_action=n_repeat_action) self._render_i = 0 self._env = suite.load( domain_name=domain_name, task_name=task_name, visualize_reward=visualize_reward ) self._name = name self.viewer = [] self._last_time_step = None self._viewer = rendering.SimpleImageViewer() self._custom_death = custom_death self.reset()
Example #23
Source File: environment.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def __init__( self, domain_name="cartpole", task_name="balance", visualize_reward: bool = True, fixed_steps: int = 1, custom_death: "CustomDeath" = None, ): """ Creates DMControlEnv and initializes the environment. :param domain_name: match dm_control interface. :param task_name: match dm_control interface. :param visualize_reward: match dm_control interface. :param fixed_steps: The number of consecutive times that an action will be applied. This allows us to set the frequency at which the policy will play. :param custom_death: Pro hack to beat the shit out of DeepMind even further. """ from dm_control import suite name = str(domain_name) + ":" + str(task_name) super(DMControlEnv, self).__init__(name=name, state=None) self.fixed_steps = fixed_steps self._render_i = 0 self._env = suite.load( domain_name=domain_name, task_name=task_name, visualize_reward=visualize_reward ) self._name = name self.viewer = [] self._last_time_step = None self._custom_death = custom_death self.reset()
Example #24
Source File: dm_control_env.py From garage with MIT License | 5 votes |
def from_suite(cls, domain_name, task_name): return cls(suite.load(domain_name, task_name), name='{}.{}'.format(domain_name, task_name))
Example #25
Source File: wrappers.py From dmc2gym with MIT License | 4 votes |
def __init__( self, domain_name, task_name, task_kwargs=None, visualize_reward={}, from_pixels=False, height=84, width=84, camera_id=0, frame_skip=1, environment_kwargs=None, channels_first=True ): assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour' self._from_pixels = from_pixels self._height = height self._width = width self._camera_id = camera_id self._frame_skip = frame_skip self._channels_first = channels_first # create task self._env = suite.load( domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, visualize_reward=visualize_reward, environment_kwargs=environment_kwargs ) # true and normalized action spaces self._true_action_space = _spec_to_box([self._env.action_spec()]) self._norm_action_space = spaces.Box( low=-1.0, high=1.0, shape=self._true_action_space.shape, dtype=np.float32 ) # create observation space if from_pixels: shape = [3, height, width] if channels_first else [height, width, 3] self._observation_space = spaces.Box( low=0, high=255, shape=shape, dtype=np.uint8 ) else: self._observation_space = _spec_to_box( self._env.observation_spec().values() ) self._state_space = _spec_to_box( self._env.observation_spec().values() ) self.current_state = None # set seed self.seed(seed=task_kwargs.get('random', 1))