Source code for exciting_environments.env_struct


import numpy as np
from gymnasium import spaces
from gymnasium import vector
import chex

[docs]class CoreEnvironment: """ Description: Structure of provided Environments. State Variables: Each environment has got a list of state variables that are defined by the physical system represented. Example: ``['theta', 'omega']`` Action Variable: Each environment has got an action which is applied to the physical system represented. Example: ``['torque']`` Observation Space(State Space): Type: Box() The Observation Space is nothing but the State Space of the pyhsical system. This Space is a normalized, continious, multidimensional box in [-1,1]. Action Space: Type: Box() The action space of the environments are the action spaces of the physical systems. This Space is a continious, multidimensional box. Initial State: Initial state values depend on the physical system. """ @property def def_reward_function(self): """Returns the default RewardFunction of the environment.""" return self.default_reward_func @property def batch_size(self): """Returns the batch size of the environment setup.""" return self._batch_size @property def obs_description(self): """Returns a list of state names of all states in the observation (equal to state space).""" return self.states_description @property def states_description(self): """Returns a list of state names of all states in the states space.""" return np.array(["state1_name","..."]) @property def action_description(self): """Returns the name of the action.""" return np.array(["action_name"]) def __init__(self, batch_size, physical_paras, max_action, reward_func=None, tau = 1e-4 , constraints= []): """ Args: batch_size(int): Number of training examples utilized in one iteration. physical_paras: Depending on environment there are multiple parameter for the physical system. max_action(float): Maximum action that can be applied to the system. reward_func(function): Reward function for training. Needs Observation-Matrix and Action as Parameters. Default: None (default_reward_func from class) tau(float): Duration of one control step in seconds. Default: 1e-4. constraints(array): Constraints for states. """
[docs] def reset(self,random_key:chex.PRNGKey=False,initial_values:np.ndarray=None): """ Reset the environment, return initial observation vector. Options: - Observation/State Space gets a random initial sample - Initial Observation/State Space is set to initial_values array """ return
[docs] def render(self, *_, **__): """ Update the visualization of the motor. NotImplemented """ raise NotImplementedError("To be implemented!")
[docs] def step(self, action): """Perform one simulation step of the environment with an action of the action space. Args: action: Action to play on the environment. Returns: Multiple Outputs: observation(ndarray(float)): Observation/State Matrix (shape=(batch_size,states)). reward(ndarray(float)): Amount of reward received for the last step (shape=(batch_size,1)). terminated(bool): Flag, indicating if Agent has reached the terminal state. truncated(ndarray(bool)): Flag, indicating if state has gone out of bounds (shape=(batch_size,states)). {}: An empty dictionary for consistency with the OpenAi Gym interface. """ return
[docs] def close(self): """Called when the environment is deleted. NotImplemented """ raise NotImplementedError("To be implemented!")