Source code for rlgraph.environments.environment
# Copyright 2018 The RLgraph authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from rlgraph.utils.specifiable import Specifiable
from rlgraph.spaces import Space
[docs]class Environment(Specifiable):
"""
An Env class used to run experiment-based RL.
"""
def __init__(self, state_space, action_space, seed=None):
"""
Args:
state_space (Union[dict,Space]): The spec-dict for generating the state Space or the state Space object
itself.
action_space (Union[dict,Space]): The spec-dict for generating the action Space or the action Space object
itself.
#reward_clipping (Optionalp[Tuple[float,float],float]: An optional reward clipping setting used
# to restrict all rewards produced by the Environment to be in a certain range.
# None for no clipping. Single float for clipping between -`reward_clipping` and +`reward_clipping`.
"""
super(Environment, self).__init__()
self.state_space = Space.from_spec(state_space)
self.action_space = Space.from_spec(action_space)
# self.reward_clipping = reward_clipping
# Add some seeding to the created Env.
if seed is not None:
self.seed(seed)
[docs] def seed(self, seed=None):
"""
Sets the random seed of the environment to the given value.
Args:
seed (int): The seed to use (default: current epoch seconds).
Returns:
int: The seed actually used.
"""
raise NotImplementedError
[docs] def reset(self):
"""
Resets the state of the environment, returning an initial observation.
Returns:
tuple: The Env's state after the reset.
"""
raise NotImplementedError
[docs] def step(self, **kwargs):
"""
Run one time step of the environment's dynamics. When the end of an episode is reached, reset() should be
called to reset the environment's internal state.
Args:
kwargs (any): The action(s) to be executed by the environment. Actions have to be members of this
Environment's action_space (a call to self.action_space.contains(action) must return True)
Returns:
tuple:
- The state s' after(!) executing the given actions(s).
- The reward received after taking a in s.
- Whether s' is a terminal state.
- Some Environment specific info.
"""
raise NotImplementedError
[docs] def render(self):
"""
Should render the Environment in its current state. May be implemented or not.
"""
pass
[docs] def terminate(self):
"""
Clean up operation. May be implemented or not.
"""
pass
def __str__(self):
raise NotImplementedError