LightZero
教程
安装和快速入门指南
LightZero 中如何自定义算法?
LightZero 中如何自定义环境?
LightZero 中如何设置配置文件?
LightZero的日志监控体系
API 文档
Agent
Config
Entry
Envs
MCTS
Model
Policy
Worker
LightZero
Index
Index
_
|
A
|
C
|
D
|
E
|
F
|
G
|
I
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
U
_
__AUTHOR__ (in module lzero.config.meta)
__AUTHOR_EMAIL__ (in module lzero.config.meta)
__DESCRIPTION__ (in module lzero.config.meta)
__init__() (lzero.entry.eval_alphazero.eval_alphazero method)
(lzero.entry.eval_muzero.eval_muzero method)
(lzero.entry.eval_muzero_with_gym_env.eval_muzero_with_gym_env method)
(lzero.entry.train_alphazero.train_alphazero method)
(lzero.entry.train_muzero.train_muzero method)
(lzero.entry.train_muzero_with_gym_env.train_muzero_with_gym_env method)
(lzero.entry.train_muzero_with_reward_model.train_muzero_with_reward_model method)
(lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper method)
(lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
(lzero.mcts.tree_search.mcts_ctree.EfficientZeroMCTSCtree method)
(lzero.mcts.tree_search.mcts_ctree.GumbelMuZeroMCTSCtree method)
(lzero.mcts.tree_search.mcts_ctree.MuZeroMCTSCtree method)
(lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
(lzero.worker.muzero_collector.MuZeroCollector method)
(lzero.worker.muzero_evaluator.MuZeroEvaluator method)
__TITLE__ (in module lzero.config.meta)
__VERSION__ (in module lzero.config.meta)
_abc_impl (lzero.mcts.buffer.game_buffer.GameBuffer attribute)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer attribute)
(lzero.policy.alphazero.AlphaZeroPolicy attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy attribute)
(lzero.worker.muzero_collector.MuZeroCollector attribute)
(lzero.worker.muzero_evaluator.MuZeroEvaluator attribute)
_asdict() (lzero.policy.alphazero.AlphaZeroPolicy.collect_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function method)
_calculate_policy_loss_disc() (lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_compute_priorities() (lzero.worker.muzero_collector.MuZeroCollector method)
_compute_target_policy_non_reanalyzed() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_compute_target_policy_reanalyzed() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_compute_target_reward_value() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_create_model() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_field_defaults (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
_fields (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
_forward_collect() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_forward_eval() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_forward_learn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_get_attribute() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_get_batch_size() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_get_n_episode() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_get_n_sample() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_get_simulation_env() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_get_train_sample() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_init_collect() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_init_eval() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_init_learn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_init_multi_gpu_setting() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_is_protocol (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper attribute)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper attribute)
_load_state_dict_collect() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_load_state_dict_eval() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_load_state_dict_learn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_make() (lzero.policy.alphazero.AlphaZeroPolicy.collect_function class method)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function class method)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function class method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function class method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function class method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function class method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function class method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function class method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function class method)
_make_batch() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_make_batch_for_reanalyze() (lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_monitor_vars_learn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_np_random (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
_output_log() (lzero.worker.muzero_collector.MuZeroCollector method)
_policy_value_fn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
_policy_value_func() (lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_prepare_policy_non_reanalyzed_context() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_prepare_policy_reanalyzed_context() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_prepare_reward_value_context() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_preprocess_to_play_and_action_mask() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_process_transition() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_push_game_segment() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_remove() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_replace() (lzero.policy.alphazero.AlphaZeroPolicy.collect_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function method)
_reset_collect() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_reset_eval() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_reset_learn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_reset_stat() (lzero.worker.muzero_collector.MuZeroCollector method)
_sample_orig_data() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_sample_orig_data_episode() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_sample_orig_reanalyze_batch() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_sample_orig_reanalyze_data() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_scalar_reward() (lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
_set_attribute() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_state_dict_collect() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_state_dict_eval() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
_state_dict_learn() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
A
action_space (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
ActionDiscretizationEnvWrapper (class in lzero.envs.wrappers.action_discretization_env_wrapper)
AlphaZeroPolicy (class in lzero.policy.alphazero)
AlphaZeroPolicy.collect_function (class in lzero.policy.alphazero)
AlphaZeroPolicy.eval_function (class in lzero.policy.alphazero)
AlphaZeroPolicy.learn_function (class in lzero.policy.alphazero)
C
cfg (lzero.policy.alphazero.AlphaZeroPolicy property)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy property)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy property)
class_name() (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper class method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper class method)
close() (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper method)
(lzero.worker.muzero_collector.MuZeroCollector method)
(lzero.worker.muzero_evaluator.MuZeroEvaluator method)
collect() (lzero.worker.muzero_collector.MuZeroCollector method)
collect_mode (lzero.policy.alphazero.AlphaZeroPolicy property)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy property)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy property)
config (lzero.mcts.buffer.game_buffer.GameBuffer attribute)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer attribute)
(lzero.mcts.tree_search.mcts_ctree.EfficientZeroMCTSCtree attribute)
(lzero.mcts.tree_search.mcts_ctree.GumbelMuZeroMCTSCtree attribute)
(lzero.mcts.tree_search.mcts_ctree.MuZeroMCTSCtree attribute)
(lzero.policy.alphazero.AlphaZeroPolicy attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy attribute)
(lzero.worker.muzero_collector.MuZeroCollector attribute)
(lzero.worker.muzero_evaluator.MuZeroEvaluator attribute)
count() (lzero.policy.alphazero.AlphaZeroPolicy.collect_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function method)
D
default_config() (lzero.mcts.buffer.game_buffer.GameBuffer class method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer class method)
(lzero.mcts.tree_search.mcts_ctree.EfficientZeroMCTSCtree class method)
(lzero.mcts.tree_search.mcts_ctree.GumbelMuZeroMCTSCtree class method)
(lzero.mcts.tree_search.mcts_ctree.MuZeroMCTSCtree class method)
(lzero.policy.alphazero.AlphaZeroPolicy class method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy class method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy class method)
(lzero.worker.muzero_collector.MuZeroCollector class method)
(lzero.worker.muzero_evaluator.MuZeroEvaluator class method)
default_model() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
E
EfficientZeroMCTSCtree (class in lzero.mcts.tree_search.mcts_ctree)
envstep (lzero.worker.muzero_collector.MuZeroCollector property)
eval() (lzero.worker.muzero_evaluator.MuZeroEvaluator method)
eval_alphazero (class in lzero.entry.eval_alphazero)
eval_mode (lzero.policy.alphazero.AlphaZeroPolicy property)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy property)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy property)
eval_muzero (class in lzero.entry.eval_muzero)
eval_muzero_with_gym_env (class in lzero.entry.eval_muzero_with_gym_env)
F
forward (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
G
GameBuffer (class in lzero.mcts.buffer.game_buffer)
get_attribute (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
get_num_of_episodes() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
get_num_of_game_segments() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
get_num_of_transitions() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
get_train_sample (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
GumbelAlphaZeroPolicy (class in lzero.policy.gumbel_alphazero)
GumbelAlphaZeroPolicy.collect_function (class in lzero.policy.gumbel_alphazero)
GumbelAlphaZeroPolicy.eval_function (class in lzero.policy.gumbel_alphazero)
GumbelAlphaZeroPolicy.learn_function (class in lzero.policy.gumbel_alphazero)
GumbelMuZeroMCTSCtree (class in lzero.mcts.tree_search.mcts_ctree)
I
index() (lzero.policy.alphazero.AlphaZeroPolicy.collect_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function method)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function method)
info (lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
L
learn_mode (lzero.policy.alphazero.AlphaZeroPolicy property)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy property)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy property)
LightZeroEnvWrapper (class in lzero.envs.wrappers.lightzero_env_wrapper)
load_state_dict (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
lzero.config.meta
module
M
metadata (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
module
lzero.config.meta
monitor_vars (lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
MuZeroCollector (class in lzero.worker.muzero_collector)
MuZeroEvaluator (class in lzero.worker.muzero_evaluator)
MuZeroGameBuffer (class in lzero.mcts.buffer.game_buffer_muzero)
MuZeroMCTSCtree (class in lzero.mcts.tree_search.mcts_ctree)
N
np_random (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
O
observation_space (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
P
pad_and_save_last_trajectory() (lzero.worker.muzero_collector.MuZeroCollector method)
process_transition (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
push_game_segments() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
R
reanalyze_buffer() (lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
remove_oldest_data_to_fit() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
render() (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper method)
render_mode (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
reset (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
reset() (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper method)
(lzero.worker.muzero_collector.MuZeroCollector method)
(lzero.worker.muzero_evaluator.MuZeroEvaluator method)
reset_env() (lzero.worker.muzero_collector.MuZeroCollector method)
(lzero.worker.muzero_evaluator.MuZeroEvaluator method)
reset_policy() (lzero.worker.muzero_collector.MuZeroCollector method)
(lzero.worker.muzero_evaluator.MuZeroEvaluator method)
reset_runtime_metrics() (lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
reward_range (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
roots() (lzero.mcts.tree_search.mcts_ctree.EfficientZeroMCTSCtree class method)
(lzero.mcts.tree_search.mcts_ctree.GumbelMuZeroMCTSCtree class method)
(lzero.mcts.tree_search.mcts_ctree.MuZeroMCTSCtree class method)
S
sample() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)
SampledAlphaZeroPolicy (class in lzero.policy.sampled_alphazero)
SampledAlphaZeroPolicy.collect_function (class in lzero.policy.sampled_alphazero)
SampledAlphaZeroPolicy.eval_function (class in lzero.policy.sampled_alphazero)
SampledAlphaZeroPolicy.learn_function (class in lzero.policy.sampled_alphazero)
search() (lzero.mcts.tree_search.mcts_ctree.EfficientZeroMCTSCtree method)
(lzero.mcts.tree_search.mcts_ctree.GumbelMuZeroMCTSCtree method)
(lzero.mcts.tree_search.mcts_ctree.MuZeroMCTSCtree method)
search_with_reuse() (lzero.mcts.tree_search.mcts_ctree.EfficientZeroMCTSCtree method)
(lzero.mcts.tree_search.mcts_ctree.MuZeroMCTSCtree method)
seed() (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper method)
set_attribute (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
should_eval() (lzero.worker.muzero_evaluator.MuZeroEvaluator method)
spec (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
state_dict (lzero.policy.alphazero.AlphaZeroPolicy.collect_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.eval_function attribute)
(lzero.policy.alphazero.AlphaZeroPolicy.learn_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.collect_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.eval_function attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy.learn_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.collect_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.eval_function attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy.learn_function attribute)
step() (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper method)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper method)
sync_gradients() (lzero.policy.alphazero.AlphaZeroPolicy method)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy method)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy method)
T
total_field (lzero.policy.alphazero.AlphaZeroPolicy attribute)
(lzero.policy.gumbel_alphazero.GumbelAlphaZeroPolicy attribute)
(lzero.policy.sampled_alphazero.SampledAlphaZeroPolicy attribute)
train_alphazero (class in lzero.entry.train_alphazero)
train_muzero (class in lzero.entry.train_muzero)
train_muzero_with_gym_env (class in lzero.entry.train_muzero_with_gym_env)
train_muzero_with_reward_model (class in lzero.entry.train_muzero_with_reward_model)
U
unwrapped (lzero.envs.wrappers.action_discretization_env_wrapper.ActionDiscretizationEnvWrapper property)
(lzero.envs.wrappers.lightzero_env_wrapper.LightZeroEnvWrapper property)
update_priority() (lzero.mcts.buffer.game_buffer.GameBuffer method)
(lzero.mcts.buffer.game_buffer_muzero.MuZeroGameBuffer method)