Python dm_control.suite.load() Examples

The following are 25 code examples of dm_control.suite.load(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dm_control.suite , or try the search function .
Example #1
Source File: tasks.py    From planet with Apache License 2.0 6 votes vote down vote up
def _dm_control_env(
    action_repeat, max_length, domain, task, params, normalize=False,
    camera_id=None):
  if isinstance(domain, str):
    from dm_control import suite
    env = suite.load(domain, task)
  else:
    assert task is None
    env = domain()
  if camera_id is None:
    camera_id = int(params.get('camera_id', 0))
  env = control.wrappers.DeepMindWrapper(env, (64, 64), camera_id=camera_id)
  env = control.wrappers.ActionRepeat(env, action_repeat)
  if normalize:
    env = control.wrappers.NormalizeActions(env)
  env = control.wrappers.MaximumDuration(env, max_length)
  env = control.wrappers.PixelObservations(env, (64, 64), np.uint8, 'image')
  env = control.wrappers.ConvertTo32Bit(env)
  return env 
Example #2
Source File: dm_env_wrapper.py    From neural_graph_evolution with MIT License 6 votes vote down vote up
def __init__(self, args, rand_seed, monitor, width=480, height=480):
        self.width = width
        self.height = height
        task_name = dm_control_util.get_env_names(args.task)
        from dm_control import suite
        self.env = suite.load(
            domain_name=task_name[0], task_name=task_name[1],
            task_kwargs={'random': rand_seed}
        )
        self._base_path = init_path.get_abs_base_dir()
        self.NUM_EPISODE_RECORED = NUM_EPISODE_RECORED
        self._is_dirname = True

        # save the video
        self._monitor = monitor
        self._current_episode = 0
        if self._monitor:
            self.init_save(args) 
Example #3
Source File: loader_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_load_without_kwargs(self):
    env = suite.load('cartpole', 'swingup')
    self.assertIsInstance(env, control.Environment) 
Example #4
Source File: make_env.py    From surreal with MIT License 5 votes vote down vote up
def make_dm_control(env_name, env_config):
    from dm_control import suite
    from dm_control.suite.wrappers import pixels
    from .dm_wrapper import DMControlAdapter, DMControlDummyWrapper
    pixel_input = env_config.pixel_input
    domain_name, task_name = env_name.split('-')
    env = suite.load(domain_name=domain_name, task_name=task_name)
    if pixel_input:
        if os.getenv('DISABLE_MUJOCO_RENDERING'):
            # We are asking for rendering on a pod that cannot support rendering, 
            # This happens in GPU based learners when we only want to create the environment
            # to see the dimensions.
            # So we will add a dummy environment
            # TODO: add a dummy wrapper that only contains the correct specs
            env = DMControlDummyWrapper(env) #...
        else:
            env = pixels.Wrapper(env, render_kwargs={'height': 84, 'width': 84, 'camera_id': 0})
    # TODO: what to do with reward visualization
    # Reward visualization should only be done in the eval agent
    # env = suite.load(domain_name=domain_name, task_name=task_name, visualize_reward=record_video)
    env = DMControlAdapter(env, pixel_input)
    env = FilterWrapper(env, env_config)
    env = ObservationConcatenationWrapper(env)
    if pixel_input:
        env = TransposeWrapper(env)
        env = GrayscaleWrapper(env)
        if env_config.frame_stacks > 1:
            env = FrameStackWrapper(env, env_config)
    env_config.action_spec = env.action_spec()
    env_config.obs_spec = env.observation_spec()
    return env, env_config 
Example #5
Source File: dm_suite_env.py    From dm2gym with MIT License 5 votes vote down vote up
def __init__(self, domain_name, task_name, task_kwargs=None, environment_kwargs=None, visualize_reward=False):
        self.env = suite.load(domain_name, 
                              task_name, 
                              task_kwargs=task_kwargs, 
                              environment_kwargs=environment_kwargs, 
                              visualize_reward=visualize_reward)
        self.metadata = {'render.modes': ['human', 'rgb_array'],
                         'video.frames_per_second': round(1.0/self.env.control_timestep())}

        self.observation_space = convert_dm_control_to_gym_space(self.env.observation_spec())
        self.action_space = convert_dm_control_to_gym_space(self.env.action_spec())
        self.viewer = None 
Example #6
Source File: wrappers.py    From dreamer with Apache License 2.0 5 votes vote down vote up
def __init__(self, domain, task, render_size=(64, 64), camera_id=0):
    if isinstance(domain, str):
      from dm_control import suite
      self._env = suite.load(domain, task)
    else:
      assert task is None
      self._env = domain()
    self._render_size = render_size
    self._camera_id = camera_id 
Example #7
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_initial_state_is_randomized(self, domain, task):
    env = suite.load(domain, task, task_kwargs={'random': 42})
    obs1 = env.reset().observation
    obs2 = env.reset().observation
    self.assertFalse(
        all(np.all(obs1[k] == obs2[k]) for k in obs1),
        'Two consecutive initial states have identical observations.\n'
        'First: {}\nSecond: {}'.format(obs1, obs2)) 
Example #8
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_observations_dont_contain_constant_elements(self, domain, task):
    env = suite.load(domain, task)
    trajectory = make_trajectory(domain=domain, task=task, seed=0,
                                 num_episodes=2, max_steps_per_episode=1000)
    observations = {name: [] for name in env.observation_spec()}
    for time_step in trajectory:
      for name, array in six.iteritems(time_step.observation):
        observations[name].append(array)

    failures = []

    for name, array_list in six.iteritems(observations):
      # Sampling random uniform actions generally isn't sufficient to trigger
      # these touch sensors.
      if (domain in ('manipulator', 'stacker') and name == 'touch' or
          domain == 'quadruped' and name == 'force_torque'):
        continue
      stacked_arrays = np.array(array_list)
      is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0)
      has_constant_elements = (
          is_constant if np.isscalar(is_constant) else np.any(is_constant))
      if has_constant_elements:
        failures.append((name, is_constant))

    self.assertEmpty(
        failures,
        msg='The following observation(s) contain constant elements:\n{}'
        .format('\n'.join(':\t'.join([name, str(is_constant)])
                          for (name, is_constant) in failures))) 
Example #9
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_observation_arrays_dont_share_memory(self, domain, task):
    env = suite.load(domain, task)
    first_timestep = env.reset()
    action = np.zeros(env.action_spec().shape)
    second_timestep = env.step(action)
    for name, first_array in six.iteritems(first_timestep.observation):
      second_array = second_timestep.observation[name]
      self.assertFalse(
          np.may_share_memory(first_array, second_array),
          msg='Consecutive observations of {!r} may share memory.'.format(name)) 
Example #10
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_task_supports_environment_kwargs(self, domain, task):
    env = suite.load(domain, task,
                     environment_kwargs=dict(flat_observation=True))
    # Check that the kwargs are actually passed through to the environment.
    self.assertSetEqual(set(env.observation_spec()),
                        {control.FLAT_OBSERVATION_KEY}) 
Example #11
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_visualize_reward(self, domain, task):
    env = suite.load(domain, task)
    env.task.visualize_reward = True
    action = np.zeros(env.action_spec().shape)

    with mock.patch.object(env.task, 'get_reward') as mock_get_reward:
      mock_get_reward.return_value = -3.0  # Rewards < 0 should be clipped.
      env.reset()
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=0.0)

      mock_get_reward.reset_mock()
      mock_get_reward.return_value = 0.5
      env.step(action)
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)

      mock_get_reward.reset_mock()
      mock_get_reward.return_value = 2.0  # Rewards > 1 should be clipped.
      env.step(action)
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=1.0)

      mock_get_reward.reset_mock()
      mock_get_reward.return_value = 0.25
      env.reset()
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value) 
Example #12
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_model_has_at_least_2_cameras(self, domain, task):
    env = suite.load(domain, task)
    model = env.physics.model
    self.assertGreaterEqual(model.ncam, 2,
                            'Model {!r} should have at least 2 cameras, has {}.'
                            .format(model.name, model.ncam)) 
Example #13
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_components_have_names(self, domain, task):
    env = suite.load(domain, task)
    model = env.physics.model

    object_types_and_size_fields = [
        ('body', 'nbody'),
        ('joint', 'njnt'),
        ('geom', 'ngeom'),
        ('site', 'nsite'),
        ('camera', 'ncam'),
        ('light', 'nlight'),
        ('mesh', 'nmesh'),
        ('hfield', 'nhfield'),
        ('texture', 'ntex'),
        ('material', 'nmat'),
        ('equality', 'neq'),
        ('tendon', 'ntendon'),
        ('actuator', 'nu'),
        ('sensor', 'nsensor'),
        ('numeric', 'nnumeric'),
        ('text', 'ntext'),
        ('tuple', 'ntuple'),
    ]
    for object_type, size_field in object_types_and_size_fields:
      for idx in range(getattr(model, size_field)):
        object_name = model.id2name(idx, object_type)
        self.assertNotEqual(object_name, '',
                            msg='Model {!r} contains unnamed {!r} with ID {}.'
                            .format(model.name, object_type, idx)) 
Example #14
Source File: suite_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def make_trajectory(domain, task, seed, **trajectory_kwargs):
  env = suite.load(domain, task, task_kwargs={'random': seed})
  policy = uniform_random_policy(env.action_spec(), random=seed)
  return step_environment(env, policy, **trajectory_kwargs) 
Example #15
Source File: loader_test.py    From dm_control with Apache License 2.0 5 votes vote down vote up
def test_load_with_kwargs(self):
    env = suite.load('cartpole', 'swingup',
                     task_kwargs={'time_limit': 40, 'random': 99})
    self.assertIsInstance(env, control.Environment) 
Example #16
Source File: env.py    From PlaNet with MIT License 5 votes vote down vote up
def __init__(self, env, symbolic, seed, max_episode_length, action_repeat, bit_depth):
    from dm_control import suite
    from dm_control.suite.wrappers import pixels
    domain, task = env.split('-')
    self.symbolic = symbolic
    self._env = suite.load(domain_name=domain, task_name=task, task_kwargs={'random': seed})
    if not symbolic:
      self._env = pixels.Wrapper(self._env)
    self.max_episode_length = max_episode_length
    self.action_repeat = action_repeat
    if action_repeat != CONTROL_SUITE_ACTION_REPEATS[domain]:
      print('Using action repeat %d; recommended action repeat for domain is %d' % (action_repeat, CONTROL_SUITE_ACTION_REPEATS[domain]))
    self.bit_depth = bit_depth 
Example #17
Source File: dm_control.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, name: str = "cartpole-balance",
                     visualize_reward: bool = True, n_repeat_action: int = 1,
                     custom_death: "CustomDeath" = None):
            """
            Creates DMControlEnv and initializes the environment.
            :param domain_name: match dm_control interface.
            :param task_name: match dm_control interface.
            :param visualize_reward: match dm_control interface.
            :param fixed_steps: The number of consecutive times that an action will be applied.
                            This allows us to set the frequency at which the policy will play.
            :param custom_death: Pro hack to beat the shit out of DeepMind even further.
            """
            from dm_control import suite
            domain_name, task_name = name.split("-")
            super(DMControlEnv, self).__init__(name=name, n_repeat_action=n_repeat_action)
            self._render_i = 0
            self._env = suite.load(domain_name=domain_name, task_name=task_name,
                                   visualize_reward=visualize_reward)
            self._name = name
            self.viewer = []
            self._last_time_step = None
            self._viewer = rendering.SimpleImageViewer()

            self._custom_death = custom_death

            self.reset() 
Example #18
Source File: environment.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, domain_name="cartpole", task_name="balance",
                     visualize_reward: bool=True, fixed_steps: int=1,
                     custom_death: "CustomDeath"=None):
            """
            Creates DMControlEnv and initializes the environment.

            :param domain_name: match dm_control interface.
            :param task_name: match dm_control interface.
            :param visualize_reward: match dm_control interface.
            :param fixed_steps: The number of consecutive times that an action will be applied.
                            This allows us to set the frequency at which the policy will play.
            :param custom_death: Pro hack to beat the shit out of DeepMind even further.
            """
            from dm_control import suite
            name = str(domain_name) + ":" + str(task_name)
            super(DMControlEnv, self).__init__(name=name, state=None)
            self.fixed_steps = fixed_steps
            self._render_i = 0
            self._env = suite.load(domain_name=domain_name, task_name=task_name,
                                   visualize_reward=visualize_reward)
            self._name = name
            self.viewer = []
            self._last_time_step = None

            self._custom_death = custom_death
            self.reset() 
Example #19
Source File: dm_control_env.py    From mushroom-rl with MIT License 5 votes vote down vote up
def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None,
                 dt=.01, width_screen=480, height_screen=480, camera_id=0):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;

        """
        # MDP creation
        if task_kwargs is None:
            task_kwargs = dict()
        task_kwargs['time_limit'] = np.inf  # Hack to ignore dm_control time limit.

        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info) 
Example #20
Source File: walker_env_wrapper.py    From neural_graph_evolution with MIT License 5 votes vote down vote up
def __init__(self, args, rand_seed, monitor, width=480, height=480):
        self.width = width
        self.height = height
        self.task = args.task
        assert 'walker' in self.task or 'hopper' in self.task or 'cheetah' in self.task
        self.args = args
        self.is_evo = 'evo' in self.task

        from dm_control import suite
        self.env = suite.load(
            domain_name='walker', task_name='walk',
            task_kwargs={'random': rand_seed}
        )
        self._base_path = init_path.get_abs_base_dir()

        self.load_xml(os.path.join(self._base_path, 'env', 'assets/walker.xml'))
        self.set_get_observation()  # overwrite the original get_ob function
        self.set_get_reward()       # overwrite the original reward function
        self._JOINTS = ['right_hip',
                        'right_knee',
                        'right_ankle',
                        'left_hip',
                        'left_knee',
                        'left_ankle']

        # save the video
        self._monitor = monitor
        self._current_episode = 0
        if self._monitor:
            self.init_save(args) 
Example #21
Source File: fish_env_wrapper.py    From neural_graph_evolution with MIT License 5 votes vote down vote up
def __init__(self, args, rand_seed, monitor, width=480, height=480):
        self.width = width
        self.height = height
        self.task = args.task
        self.args = args
        assert 'fish3d' in self.task
        self.is_evo = 'evo' in self.task
        if 'easyswim' in self.task:
            self.target_angle = args.fish_target_angle

        from dm_control import suite
        self.env = suite.load(
            domain_name='fish', task_name='swim',
            task_kwargs={'random': rand_seed}
        )
        self._base_path = init_path.get_abs_base_dir()

        self.load_xml(os.path.join(self._base_path, 'env', 'assets/fish3d.xml'))
        self.set_get_observation()  # overwrite the original get_ob function
        self.set_get_reward()  # overwrite the original reward function
        self._JOINTS = ['tail1',
                        'tail_twist',
                        'tail2',
                        'finright_roll',
                        'finright_pitch',
                        'finleft_roll',
                        'finleft_pitch']

        # save the video
        self._monitor = monitor
        self._current_episode = 0
        if self._monitor:
            self.init_save(args) 
Example #22
Source File: dm_control.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(
        self,
        name: str = "cartpole-balance",
        visualize_reward: bool = True,
        n_repeat_action: int = 1,
        custom_death: "CustomDeath" = None,
    ):
        """
            Creates DMControlEnv and initializes the environment.
            :param domain_name: match dm_control interface.
            :param task_name: match dm_control interface.
            :param visualize_reward: match dm_control interface.
            :param fixed_steps: The number of consecutive times that an action will be applied.
                            This allows us to set the frequency at which the policy will play.
            :param custom_death: Pro hack to beat the shit out of DeepMind even further.
            """
        from dm_control import suite

        domain_name, task_name = name.split("-")
        super(DMControlEnv, self).__init__(name=name, n_repeat_action=n_repeat_action)
        self._render_i = 0
        self._env = suite.load(
            domain_name=domain_name, task_name=task_name, visualize_reward=visualize_reward
        )
        self._name = name
        self.viewer = []
        self._last_time_step = None
        self._viewer = rendering.SimpleImageViewer()

        self._custom_death = custom_death

        self.reset() 
Example #23
Source File: environment.py    From FractalAI with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(
        self,
        domain_name="cartpole",
        task_name="balance",
        visualize_reward: bool = True,
        fixed_steps: int = 1,
        custom_death: "CustomDeath" = None,
    ):
        """
            Creates DMControlEnv and initializes the environment.

            :param domain_name: match dm_control interface.
            :param task_name: match dm_control interface.
            :param visualize_reward: match dm_control interface.
            :param fixed_steps: The number of consecutive times that an action will be applied.
                            This allows us to set the frequency at which the policy will play.
            :param custom_death: Pro hack to beat the shit out of DeepMind even further.
            """
        from dm_control import suite

        name = str(domain_name) + ":" + str(task_name)
        super(DMControlEnv, self).__init__(name=name, state=None)
        self.fixed_steps = fixed_steps
        self._render_i = 0
        self._env = suite.load(
            domain_name=domain_name, task_name=task_name, visualize_reward=visualize_reward
        )
        self._name = name
        self.viewer = []
        self._last_time_step = None

        self._custom_death = custom_death
        self.reset() 
Example #24
Source File: dm_control_env.py    From garage with MIT License 5 votes vote down vote up
def from_suite(cls, domain_name, task_name):
        return cls(suite.load(domain_name, task_name),
                   name='{}.{}'.format(domain_name, task_name)) 
Example #25
Source File: wrappers.py    From dmc2gym with MIT License 4 votes vote down vote up
def __init__(
        self,
        domain_name,
        task_name,
        task_kwargs=None,
        visualize_reward={},
        from_pixels=False,
        height=84,
        width=84,
        camera_id=0,
        frame_skip=1,
        environment_kwargs=None,
        channels_first=True
    ):
        assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour'
        self._from_pixels = from_pixels
        self._height = height
        self._width = width
        self._camera_id = camera_id
        self._frame_skip = frame_skip
        self._channels_first = channels_first

        # create task
        self._env = suite.load(
            domain_name=domain_name,
            task_name=task_name,
            task_kwargs=task_kwargs,
            visualize_reward=visualize_reward,
            environment_kwargs=environment_kwargs
        )

        # true and normalized action spaces
        self._true_action_space = _spec_to_box([self._env.action_spec()])
        self._norm_action_space = spaces.Box(
            low=-1.0,
            high=1.0,
            shape=self._true_action_space.shape,
            dtype=np.float32
        )

        # create observation space
        if from_pixels:
            shape = [3, height, width] if channels_first else [height, width, 3]
            self._observation_space = spaces.Box(
                low=0, high=255, shape=shape, dtype=np.uint8
            )
        else:
            self._observation_space = _spec_to_box(
                self._env.observation_spec().values()
            )
            
        self._state_space = _spec_to_box(
                self._env.observation_spec().values()
        )
        
        self.current_state = None

        # set seed
        self.seed(seed=task_kwargs.get('random', 1))