Source code for ding.rl_utils.sampler
import torch
import treetensor.torch as ttorch
from torch.distributions import Normal, Independent
[docs]class ArgmaxSampler:
'''
Overview:
Argmax sampler, return the index of the maximum value
'''
[docs] def __call__(self, logit: torch.Tensor) -> torch.Tensor:
'''
Overview:
Return the index of the maximum value
Arguments:
- logit (:obj:`torch.Tensor`): The input tensor
Returns:
- action (:obj:`torch.Tensor`): The index of the maximum value
'''
return logit.argmax(dim=-1)
[docs]class MultinomialSampler:
'''
Overview:
Multinomial sampler, return the index of the sampled value
'''
[docs] def __call__(self, logit: torch.Tensor) -> torch.Tensor:
'''
Overview:
Return the index of the sampled value
Arguments:
- logit (:obj:`torch.Tensor`): The input tensor
Returns:
- action (:obj:`torch.Tensor`): The index of the sampled value
'''
dist = torch.distributions.Categorical(logits=logit)
return dist.sample()
[docs]class MuSampler:
'''
Overview:
Mu sampler, return the mu of the input tensor
'''
[docs] def __call__(self, logit: ttorch.Tensor) -> torch.Tensor:
'''
Overview:
Return the mu of the input tensor
Arguments:
- logit (:obj:`ttorch.Tensor`): The input tensor
Returns:
- action (:obj:`torch.Tensor`): The mu of the input tensor
'''
return logit.mu
[docs]class ReparameterizationSampler:
'''
Overview:
Reparameterization sampler, return the reparameterized value of the input tensor
'''
[docs] def __call__(self, logit: ttorch.Tensor) -> torch.Tensor:
'''
Overview:
Return the reparameterized value of the input tensor
Arguments:
- logit (:obj:`ttorch.Tensor`): The input tensor
Returns:
- action (:obj:`torch.Tensor`): The reparameterized value of the input tensor
'''
dist = Normal(logit.mu, logit.sigma)
dist = Independent(dist, 1)
return dist.rsample()
[docs]class HybridStochasticSampler:
'''
Overview:
Hybrid stochastic sampler, return the sampled action type and the reparameterized action args
'''
[docs] def __call__(self, logit: ttorch.Tensor) -> ttorch.Tensor:
'''
Overview:
Return the sampled action type and the reparameterized action args
Arguments:
- logit (:obj:`ttorch.Tensor`): The input tensor
Returns:
- action (:obj:`ttorch.Tensor`): The sampled action type and the reparameterized action args
'''
dist = torch.distributions.Categorical(logits=logit.action_type)
action_type = dist.sample()
dist = Normal(logit.action_args.mu, logit.action_args.sigma)
dist = Independent(dist, 1)
action_args = dist.rsample()
return ttorch.as_tensor({
'action_type': action_type,
'action_args': action_args,
})
[docs]class HybridDeterminsticSampler:
'''
Overview:
Hybrid deterministic sampler, return the argmax action type and the mu action args
'''
[docs] def __call__(self, logit: ttorch.Tensor) -> ttorch.Tensor:
'''
Overview:
Return the argmax action type and the mu action args
Arguments:
- logit (:obj:`ttorch.Tensor`): The input tensor
Returns:
- action (:obj:`ttorch.Tensor`): The argmax action type and the mu action args
'''
action_type = logit.action_type.argmax(dim=-1)
action_args = logit.action_args.mu
return ttorch.as_tensor({
'action_type': action_type,
'action_args': action_args,
})