Source code for rlgraph.environments.random_env
# Copyright 2018 The RLgraph authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import time
from rlgraph.environments import Environment
import rlgraph.spaces as spaces
[docs]class RandomEnv(Environment):
"""
An Env producing random states no matter what actions come in.
"""
def __init__(self, state_space, action_space, reward_space=None, terminal_prob=0.1, deterministic=False):
"""
Args:
reward_space (Union[dict,Space]): The reward Space from which to randomly sample for each step.
terminal_prob (Union[dict,Space]): The probability with which an episode ends for each step.
deterministic (bool): Convenience flag to seed the environment automatically upon construction.
"""
super(RandomEnv, self).__init__(state_space=state_space, action_space=action_space)
self.reward_space = spaces.Space.from_spec(reward_space)
self.terminal_prob = terminal_prob
if deterministic is True:
np.random.seed(10)
self.last_state = np.random.get_state()
[docs] def seed(self, seed=None):
if seed is None:
seed = time.time()
np.random.seed(seed)
self.last_state = np.random.get_state()
return seed
[docs] def reset(self):
return self.step()[0] # 0=state
[docs] def reset_for_env_stepper(self):
return self.reset()
[docs] def step(self, actions=None):
if actions is not None:
assert self.action_space.contains(actions), \
"ERROR: Given action ({}) in step is not part of action Space ({})!".format(actions, self.action_space)
# Set the seed to the last observed state for this instance.
np.random.set_state(self.last_state)
# Do the random sampling (using numpy).
state = self.state_space.sample()
reward = self.reward_space.sample()
terminal = np.random.choice([True, False], p=[self.terminal_prob, 1.0 - self.terminal_prob])
# Store the current state of the RNG.
self.last_state = np.random.get_state()
return state, reward, terminal, None
[docs] def step_for_env_stepper(self, actions=None):
ret = self.step(actions)
return ret[0], ret[1], ret[2]
def __str__(self):
return "RandomEnv()"