Source code for ding.worker.coordinator.base_serial_commander
from collections import namedtuple
from easydict import EasyDict
import copy
[docs]class BaseSerialCommander(object):
r"""
Overview:
Base serial commander class.
Interface:
__init__, step
Property:
policy
"""
@classmethod
def default_config(cls: type) -> EasyDict:
cfg = EasyDict(copy.deepcopy(cls.config))
cfg.cfg_type = cls.__name__ + 'Dict'
return cfg
config = {}
[docs] def __init__(
self,
cfg: dict,
learner: 'BaseLearner', # noqa
collector: 'BaseSerialCollector', # noqa
evaluator: 'InteractionSerialEvaluator', # noqa
replay_buffer: 'IBuffer', # noqa
policy: namedtuple = None,
) -> None:
r"""
Overview:
Init the BaseSerialCommander
Arguments:
- cfg (:obj:`dict`): the config of commander
- learner (:obj:`BaseLearner`): the learner
- collector (:obj:`BaseSerialCollector`): the collector
- evaluator (:obj:`InteractionSerialEvaluator`): the evaluator
- replay_buffer (:obj:`IBuffer`): the buffer
"""
self._cfg = cfg
self._learner = learner
self._collector = collector
self._evaluator = evaluator
self._replay_buffer = replay_buffer
self._info = {}
if policy is not None:
self.policy = policy
[docs] def step(self) -> None:
r"""
Overview:
Step the commander
"""
# Update info
learn_info = self._learner.learn_info
collector_info = {'envstep': self._collector.envstep}
self._info.update(learn_info)
self._info.update(collector_info)
# update kwargs
collect_kwargs = self._policy.get_setting_collect(self._info)
return collect_kwargs
@property
def policy(self) -> 'Policy': # noqa
return self._policy
@policy.setter
def policy(self, _policy: 'Policy') -> None: # noqa
self._policy = _policy