Source code for rlgraph.environments.environment

# Copyright 2018 The RLgraph authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from rlgraph.utils.specifiable import Specifiable
from rlgraph.spaces import Space


[docs]class Environment(Specifiable): """ An Env class used to run experiment-based RL. """ def __init__(self, state_space, action_space, seed=None): """ Args: state_space (Union[dict,Space]): The spec-dict for generating the state Space or the state Space object itself. action_space (Union[dict,Space]): The spec-dict for generating the action Space or the action Space object itself. #reward_clipping (Optionalp[Tuple[float,float],float]: An optional reward clipping setting used # to restrict all rewards produced by the Environment to be in a certain range. # None for no clipping. Single float for clipping between -`reward_clipping` and +`reward_clipping`. """ super(Environment, self).__init__() self.state_space = Space.from_spec(state_space) self.action_space = Space.from_spec(action_space) # self.reward_clipping = reward_clipping # Add some seeding to the created Env. if seed is not None: self.seed(seed)
[docs] def seed(self, seed=None): """ Sets the random seed of the environment to the given value. Args: seed (int): The seed to use (default: current epoch seconds). Returns: int: The seed actually used. """ raise NotImplementedError
[docs] def reset(self): """ Resets the state of the environment, returning an initial observation. Returns: tuple: The Env's state after the reset. """ raise NotImplementedError
[docs] def step(self, **kwargs): """ Run one time step of the environment's dynamics. When the end of an episode is reached, reset() should be called to reset the environment's internal state. Args: kwargs (any): The action(s) to be executed by the environment. Actions have to be members of this Environment's action_space (a call to self.action_space.contains(action) must return True) Returns: tuple: - The state s' after(!) executing the given actions(s). - The reward received after taking a in s. - Whether s' is a terminal state. - Some Environment specific info. """ raise NotImplementedError
[docs] def render(self): """ Should render the Environment in its current state. May be implemented or not. """ pass
[docs] def terminate(self): """ Clean up operation. May be implemented or not. """ pass
def __str__(self): raise NotImplementedError