Source code for exciting_environments.env_struct


import numpy as np
from gymnasium import spaces
from gymnasium import vector
import chex

[docs]class CoreEnvironment:
    """
    Description:
        Structure of provided Environments.

    State Variables:
        Each environment has got a list of state variables that are defined by the physical system represented.
        
        Example:
            ``['theta', 'omega']``

    Action Variable:
        Each environment has got an action which is applied to the physical system represented.
        
        Example:
            ``['torque']``
        
    Observation Space(State Space):
        Type: Box()
            The Observation Space is nothing but the State Space of the pyhsical system.
            This Space is a normalized, continious, multidimensional box in [-1,1].
        
    Action Space:
        Type: Box()
            The action space of the environments are the action spaces of the physical systems.
            This Space is a continious, multidimensional box. 
            

    Initial State:
        Initial state values depend on the physical system.

    """
      
    @property 
    def def_reward_function(self):
        """Returns the default RewardFunction of the environment."""
        return self.default_reward_func


    @property
    def batch_size(self):
        """Returns the batch size of the environment setup."""
        return self._batch_size

    @property 
    def obs_description(self):
        """Returns a list of state names of all states in the observation (equal to state space)."""
        return self.states_description
    
    @property 
    def states_description(self):
        """Returns a list of state names of all states in the states space."""
        return np.array(["state1_name","..."])
    
    @property 
    def action_description(self):
        """Returns the name of the action."""
        return np.array(["action_name"])

    def __init__(self, batch_size, physical_paras, max_action, reward_func=None, tau = 1e-4 , constraints= []):
        """
        Args:
            batch_size(int): Number of training examples utilized in one iteration.
            physical_paras: Depending on environment there are multiple parameter for the physical system.
            max_action(float): Maximum action that can be applied to the system.
            reward_func(function): Reward function for training. Needs Observation-Matrix and Action as Parameters. Default: None (default_reward_func from class) 
            tau(float): Duration of one control step in seconds. Default: 1e-4.
            constraints(array): Constraints for states.


        """
        

            
[docs]    def reset(self,random_key:chex.PRNGKey=False,initial_values:np.ndarray=None):
        """
            Reset the environment, return initial observation vector.
            Options:
                - Observation/State Space gets a random initial sample
                - Initial Observation/State Space is set to initial_values array
        
        """
        return

[docs]    def render(self, *_, **__):
        """
        Update the visualization of the motor.

        NotImplemented
        """
        raise NotImplementedError("To be implemented!")
    

[docs]    def step(self, action):
        """Perform one simulation step of the environment with an action of the action space.

        Args:
            action: Action to play on the environment.

        Returns:
            Multiple Outputs:

            observation(ndarray(float)): Observation/State Matrix (shape=(batch_size,states)).

            reward(ndarray(float)): Amount of reward received for the last step (shape=(batch_size,1)).

            terminated(bool): Flag, indicating if Agent has reached the terminal state.

            truncated(ndarray(bool)): Flag, indicating if state has gone out of bounds (shape=(batch_size,states)).

            {}: An empty dictionary for consistency with the OpenAi Gym interface.
        """
        return


[docs]    def close(self):
        """Called when the environment is deleted.

        NotImplemented
        """
        raise NotImplementedError("To be implemented!")