Source code for grl.rl_modules.value_network.value_network

from typing import Tuple, Union

import torch
import torch.nn as nn
from easydict import EasyDict
from tensordict import TensorDict

from grl.neural_network import get_module
from grl.neural_network.encoders import get_encoder


[docs]class VNetwork(nn.Module):
    """
    Overview:
        Value network, which is used to approximate the value function.
    Interfaces:
        ``__init__``, ``forward``
    """

[docs]    def __init__(self, config: EasyDict):
        """
        Overview:
            Initialization of value network.
        Arguments:
            config (:obj:`EasyDict`): The configuration dict.
        """
        super().__init__()
        self.config = config
        self.model = torch.nn.ModuleDict()
        if hasattr(config, "state_encoder"):
            self.model["state_encoder"] = get_encoder(config.state_encoder.type)(
                **config.state_encoder.args
            )
        else:
            self.model["state_encoder"] = torch.nn.Identity()
        if hasattr(config, "condition_encoder"):
            self.model["condition_encoder"] = get_encoder(
                config.condition_encoder.type
            )(**config.condition_encoder.args)
        else:
            self.model["condition_encoder"] = torch.nn.Identity()
        # TODO
        # specific backbone network
        self.model["backbone"] = get_module(config.backbone.type)(
            **config.backbone.args
        )

[docs]    def forward(
        self,
        state: Union[torch.Tensor, TensorDict],
        condition: Union[torch.Tensor, TensorDict] = None,
    ) -> torch.Tensor:
        """
        Overview:
            Return output of value networks.
        Arguments:
            state (:obj:`Union[torch.Tensor, TensorDict]`): The input state.
            condition (:obj:`Union[torch.Tensor, TensorDict]`): The input condition.
        Returns:
            value (:obj:`Union[torch.Tensor, TensorDict]`): The output of value network.
        """

        state_embedding = self.model["state_encoder"](state)
        if condition is not None:
            condition_encoder_embedding = self.model["condition_encoder"](condition)
            return self.model["backbone"](state_embedding, condition_encoder_embedding)
        else:
            return self.model["backbone"](state_embedding)


[docs]class DoubleVNetwork(nn.Module):
    """
    Overview:
        Double value network, which has two value networks.
    Interfaces:
        ``__init__``, ``forward``, ``compute_double_v``, ``compute_mininum_v``
    """

[docs]    def __init__(self, config: EasyDict):
        super().__init__()

        self.model = torch.nn.ModuleDict()
        self.model["v1"] = VNetwork(config)
        self.model["v2"] = VNetwork(config)

[docs]    def compute_double_v(
        self,
        state: Union[torch.Tensor, TensorDict],
        condition: Union[torch.Tensor, TensorDict],
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Overview:
            Return the output of two value networks.
        Arguments:
            state (:obj:`Union[torch.Tensor, TensorDict]`): The input state.
            condition (:obj:`Union[torch.Tensor, TensorDict]`): The input condition.
        Returns:
            v1 (:obj:`Union[torch.Tensor, TensorDict]`): The output of the first value network.
            v2 (:obj:`Union[torch.Tensor, TensorDict]`): The output of the second value network.
        """

        return self.model["v1"](state, condition), self.model["v2"](state, condition)

[docs]    def compute_mininum_v(
        self,
        state: Union[torch.Tensor, TensorDict],
        condition: Union[torch.Tensor, TensorDict],
    ) -> torch.Tensor:
        """
        Overview:
            Return the minimum output of two value networks.
        Arguments:
            state (:obj:`Union[torch.Tensor, TensorDict]`): The input state.
            condition (:obj:`Union[torch.Tensor, TensorDict]`): The input condition.
        Returns:
            minimum_v (:obj:`Union[torch.Tensor, TensorDict]`): The minimum output of value network.
        """

        return torch.min(*self.compute_double_v(state, condition=condition))

[docs]    def forward(
        self,
        state: Union[torch.Tensor, TensorDict],
        condition: Union[torch.Tensor, TensorDict],
    ) -> torch.Tensor:
        """
        Overview:
            Return the minimum output of two value networks.
        Arguments:
            state (:obj:`Union[torch.Tensor, TensorDict]`): The input state.
            condition (:obj:`Union[torch.Tensor, TensorDict]`): The input condition.
        Returns:
            minimum_v (:obj:`Union[torch.Tensor, TensorDict]`): The minimum output of value network.
        """

        return self.compute_mininum_v(state, condition=condition)