Source code for rlgraph.environments.random_env

# Copyright 2018 The RLgraph authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import time

from rlgraph.environments import Environment
import rlgraph.spaces as spaces


[docs]class RandomEnv(Environment):
    """
    An Env producing random states no matter what actions come in.
    """
    def __init__(self, state_space, action_space, reward_space=None, terminal_prob=0.1, deterministic=False):
        """
        Args:
            reward_space (Union[dict,Space]): The reward Space from which to randomly sample for each step.
            terminal_prob (Union[dict,Space]): The probability with which an episode ends for each step.
            deterministic (bool): Convenience flag to seed the environment automatically upon construction.
        """
        super(RandomEnv, self).__init__(state_space=state_space, action_space=action_space)

        self.reward_space = spaces.Space.from_spec(reward_space)
        self.terminal_prob = terminal_prob

        if deterministic is True:
            np.random.seed(10)
        self.last_state = np.random.get_state()

[docs]    def seed(self, seed=None):
        if seed is None:
            seed = time.time()
        np.random.seed(seed)
        self.last_state = np.random.get_state()
        return seed

[docs]    def reset(self):
        return self.step()[0]  # 0=state

[docs]    def reset_for_env_stepper(self):
        return self.reset()

[docs]    def step(self, actions=None):
        if actions is not None:
            assert self.action_space.contains(actions), \
                "ERROR: Given action ({}) in step is not part of action Space ({})!".format(actions, self.action_space)

        # Set the seed to the last observed state for this instance.
        np.random.set_state(self.last_state)
        # Do the random sampling (using numpy).
        state = self.state_space.sample()
        reward = self.reward_space.sample()
        terminal = np.random.choice([True, False], p=[self.terminal_prob, 1.0 - self.terminal_prob])
        # Store the current state of the RNG.
        self.last_state = np.random.get_state()
        return state, reward, terminal, None

[docs]    def step_for_env_stepper(self, actions=None):
        ret = self.step(actions)
        return ret[0], ret[1], ret[2]

    def __str__(self):
        return "RandomEnv()"