Index _ | A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | R | S | T | U | W | Z _ __enter__() (lightrft.utils.timer.Timer method) (lightrft.utils.utils.DummyProfile method) __exit__() (lightrft.utils.timer.Timer method) (lightrft.utils.utils.DummyProfile method) __init__() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) (lightrft.strategy.utils.broadcast_utils.BroadcastManager method) (lightrft.utils.timer.Timer method) (lightrft.utils.utils.DummyProfile method) __iter__() (lightrft.strategy.utils.data_utils.DistributedSampler method) __len__() (lightrft.strategy.utils.data_utils.DistributedSampler method) _build_multimodal_inputs() (lightrft.strategy.strategy_base.StrategyBase class method) _DEFAULT_NO_DECAY_NAME_LIST (in module lightrft.strategy.utils.optimizer_utils) A actor_learning_rate (lightrft.strategy.config.StrategyConfig attribute) adam_betas (lightrft.strategy.config.StrategyConfig attribute) adam_offload (lightrft.strategy.config.StrategyConfig attribute) add_arguments() (in module lightrft.utils.cli_args) add_param_group() (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) advantage_clip (lightrft.strategy.config.StrategyConfig attribute) advantage_estimator (lightrft.strategy.config.StrategyConfig attribute) advantages_norm (lightrft.strategy.config.StrategyConfig attribute) all_gather() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) all_gather_all_prompt_token_ids() (in module lightrft.strategy.utils.distributed_util) all_reduce() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) analyse_output_lengths() (in module lightrft.strategy.utils.statistic) analyze_output_lengths() (in module lightrft.strategy.utils.statistic) B backward() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) backward_by_grad() (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) BaseDataHandler (class in lightrft.datasets.utils) BaseGradScaler (class in lightrft.strategy.fsdp.fsdp_utils) BaseOptimizer (class in lightrft.strategy.fsdp.fsdp_utils) bf16 (lightrft.strategy.config.StrategyConfig attribute) broadcast_to_engine() (lightrft.strategy.utils.broadcast_utils.BroadcastManager method) BroadcastManager (class in lightrft.strategy.utils.broadcast_utils) C calc_l2_norm() (in module lightrft.strategy.fsdp.fsdp_utils) calc_lp() (in module lightrft.strategy.fsdp.fsdp_utils) clip_grad_norm() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) collate_fn() (lightrft.datasets.grm_dataset.GRMDataset method) collect() (lightrft.strategy.utils.statistic.GenLenAnalyser method) collect_local_output_lengths() (in module lightrft.strategy.utils.statistic) compute_norm() (in module lightrft.strategy.fsdp.fsdp_utils) conditional_sft_processor() (in module lightrft.utils.processor) create_optimizer() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) create_sub_group() (in module lightrft.strategy.utils.distributed_util) create_trajectory_saver() (in module lightrft.utils.trajectory_saver) critic_learning_rate (lightrft.strategy.config.StrategyConfig attribute) critic_pretrain (lightrft.strategy.config.StrategyConfig attribute) D DeepspeedStrategy (class in lightrft.strategy.deepspeed.deepspeed) defaults (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer property) DistributedSampler (class in lightrft.strategy.utils.data_utils) (class in lightrft.utils.distributed_sampler) DTENSOR_SUPPORTED (in module lightrft.strategy.fsdp.fsdp_optimizer) DummyProfile (class in lightrft.utils.utils) dynamic_sampling (lightrft.strategy.config.StrategyConfig attribute) DynamicGradScaler (class in lightrft.strategy.fsdp.fsdp_utils) E enable_engine_sleep (lightrft.strategy.config.StrategyConfig attribute) engine_generate_local() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) engine_tp_size (lightrft.strategy.config.StrategyConfig attribute) engine_type (lightrft.strategy.config.StrategyConfig attribute) exist_and_not_none() (in module lightrft.datasets.utils) extra_args (lightrft.strategy.config.StrategyConfig attribute) extract_answer() (in module lightrft.datasets.utils) F FakeStrategy (class in lightrft.strategy.fake_strategy) find_latest_checkpoint_dir() (in module lightrft.strategy.utils.ckpt_utils) find_subsequence() (in module lightrft.datasets.utils) from_args() (lightrft.strategy.config.StrategyConfig class method) fsdp (lightrft.strategy.config.StrategyConfig attribute) fsdp_cpu_offload (lightrft.strategy.config.StrategyConfig attribute) FSDPadaptOptimizer (class in lightrft.strategy.fsdp.fsdp_optimizer) FSDPV2Strategy (class in lightrft.strategy.fsdp.fsdpv2) fused_linear_logprob (lightrft.strategy.config.StrategyConfig attribute) G gather_all_lengths() (in module lightrft.strategy.utils.statistic) gather_and_generate() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) gather_forward_split_backward_and_unpad() (in module lightrft.strategy.utils.parallel_utils) gather_inputs_object_for_inference() (in module lightrft.strategy.utils.distributed_util) generate() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) GenLenAnalyser (class in lightrft.strategy.utils.statistic) get_current_device() (in module lightrft.utils.utils) get_ds_eval_config() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) get_ds_train_config() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) get_eval_ds_config() (in module lightrft.strategy.deepspeed.deepspeed_utils) get_fake_strategy() (in module lightrft.strategy.fake_strategy) get_media_info() (lightrft.datasets.hpdv3.HPDv3Handler method) (lightrft.datasets.image_reward_db.ImageRewardDBHandler method) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2AHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VHandler method) (lightrft.datasets.rapidata.RapidataI2VHandler method) (lightrft.datasets.rapidata.RapidataT2VHandler method) (lightrft.datasets.utils.BaseDataHandler method) get_norm() (in module lightrft.strategy.fsdp.fsdp_utils) get_optimizer_grouped_parameters() (in module lightrft.strategy.utils.optimizer_utils) get_processor() (in module lightrft.utils.processor) get_rank() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) get_sequence_parallel_group() (in module lightrft.strategy.utils.parallel_utils) get_sequence_parallel_rank() (in module lightrft.strategy.utils.parallel_utils) get_sequence_parallel_world_size() (in module lightrft.strategy.utils.parallel_utils) get_sglang_engine() (in module lightrft.strategy.sglang_utils) get_strategy() (in module lightrft.strategy.strategy) get_task_instructions() (in module lightrft.datasets.utils) get_tensor_norm() (in module lightrft.strategy.fsdp.fsdp_utils) get_torch_profiler() (in module lightrft.utils.utils) get_train_ds_config() (in module lightrft.strategy.deepspeed.deepspeed_utils) get_vllm_engine() (in module lightrft.strategy.vllm_utils) grad_accum_dtype (lightrft.strategy.config.StrategyConfig attribute) GRMDataset (class in lightrft.datasets.grm_dataset) H HPDv3GRMHandler (class in lightrft.datasets.hpdv3) HPDv3Handler (class in lightrft.datasets.hpdv3) HPDv3PairHandler (class in lightrft.datasets.hpdv3) I ImageGenCoTRewardGRMHandler (class in lightrft.datasets.imagegen_cot_reward) ImageRewardDBHandler (class in lightrft.datasets.image_reward_db) init_logger() (in module lightrft.utils.logging_utils) init_model_context() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) init_process_group() (in module lightrft.strategy.utils.distributed_util) inv_scale (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler property) is_actor() (in module lightrft.strategy.strategy_base) is_meta_initialized() (in module lightrft.strategy.fsdp.fsdp_utils) is_mp_optimizer() (in module lightrft.strategy.fsdp.fsdpv2) is_rank_0() (lightrft.strategy.fake_strategy.FakeStrategy class method) (lightrft.strategy.strategy_base.StrategyBase class method) iterative_dpo_processor() (in module lightrft.utils.processor) K kl_estimator (lightrft.strategy.config.StrategyConfig attribute) L l2 (lightrft.strategy.config.StrategyConfig attribute) lightrft.datasets.grm_dataset module lightrft.datasets.hpdv3 module lightrft.datasets.image_reward_db module lightrft.datasets.imagegen_cot_reward module lightrft.datasets.omnirewardbench module lightrft.datasets.rapidata module lightrft.datasets.utils module lightrft.strategy module lightrft.strategy.config module lightrft.strategy.deepspeed module lightrft.strategy.deepspeed.deepspeed module lightrft.strategy.deepspeed.deepspeed_utils module lightrft.strategy.fake_strategy module lightrft.strategy.fsdp module lightrft.strategy.fsdp.fsdp_optimizer module lightrft.strategy.fsdp.fsdp_utils module lightrft.strategy.fsdp.fsdpv2 module lightrft.strategy.sglang_utils module lightrft.strategy.sglang_utils.sgl_model_saver module lightrft.strategy.sglang_utils.sglang_engine module lightrft.strategy.strategy module lightrft.strategy.strategy_base module lightrft.strategy.utils module lightrft.strategy.utils.broadcast_utils module lightrft.strategy.utils.ckpt_utils module lightrft.strategy.utils.data_utils module lightrft.strategy.utils.distributed_util module lightrft.strategy.utils.optimizer_utils module lightrft.strategy.utils.parallel_utils module lightrft.strategy.utils.statistic module lightrft.strategy.vllm_utils module lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray module lightrft.utils module lightrft.utils.cli_args module lightrft.utils.distributed_sampler module lightrft.utils.logging_utils module lightrft.utils.processor module lightrft.utils.remote_rm_utils module lightrft.utils.timer module lightrft.utils.trajectory_saver module lightrft.utils.utils module load_ckpt() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) load_data() (lightrft.datasets.hpdv3.HPDv3Handler method) (lightrft.datasets.image_reward_db.ImageRewardDBHandler method) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler method) (lightrft.datasets.rapidata.RapidataT2VHandler method) (lightrft.datasets.utils.BaseDataHandler method) load_fsdp_optimizer() (in module lightrft.strategy.fsdp.fsdp_optimizer) load_model() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) load_multimodal_content() (in module lightrft.datasets.utils) load_state_dict() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.DynamicGradScaler method) local_rank (lightrft.strategy.config.StrategyConfig attribute) loss_scale (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer property) lr_warmup_ratio (lightrft.strategy.config.StrategyConfig attribute) M manual_transformer_cls_names_to_wrap (in module lightrft.strategy.fsdp.fsdpv2) max_norm (lightrft.strategy.config.StrategyConfig attribute) maybe_load_optimizer() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) maybe_offload_optimizer() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) maybe_sleep_inference_engine() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) micro_rollout_batch_size (lightrft.strategy.config.StrategyConfig attribute) micro_train_batch_size (lightrft.strategy.config.StrategyConfig attribute) mixed_mm_data (lightrft.strategy.config.StrategyConfig attribute) ModelOptimPair (in module lightrft.strategy.deepspeed.deepspeed) (in module lightrft.strategy.fsdp.fsdpv2) ModelOrModelOptimPair (in module lightrft.strategy.deepspeed.deepspeed) (in module lightrft.strategy.fsdp.fsdpv2) module lightrft.datasets.grm_dataset lightrft.datasets.hpdv3 lightrft.datasets.image_reward_db lightrft.datasets.imagegen_cot_reward lightrft.datasets.omnirewardbench lightrft.datasets.rapidata lightrft.datasets.utils lightrft.strategy lightrft.strategy.config lightrft.strategy.deepspeed lightrft.strategy.deepspeed.deepspeed lightrft.strategy.deepspeed.deepspeed_utils lightrft.strategy.fake_strategy lightrft.strategy.fsdp lightrft.strategy.fsdp.fsdp_optimizer lightrft.strategy.fsdp.fsdp_utils lightrft.strategy.fsdp.fsdpv2 lightrft.strategy.sglang_utils lightrft.strategy.sglang_utils.sgl_model_saver lightrft.strategy.sglang_utils.sglang_engine lightrft.strategy.strategy lightrft.strategy.strategy_base lightrft.strategy.utils lightrft.strategy.utils.broadcast_utils lightrft.strategy.utils.ckpt_utils lightrft.strategy.utils.data_utils lightrft.strategy.utils.distributed_util lightrft.strategy.utils.optimizer_utils lightrft.strategy.utils.parallel_utils lightrft.strategy.utils.statistic lightrft.strategy.vllm_utils lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray lightrft.utils lightrft.utils.cli_args lightrft.utils.distributed_sampler lightrft.utils.logging_utils lightrft.utils.processor lightrft.utils.remote_rm_utils lightrft.utils.timer lightrft.utils.trajectory_saver lightrft.utils.utils moving_average() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) multi_tensor_l2norm_torch() (in module lightrft.strategy.fsdp.fsdp_utils) N n_samples_per_prompt (lightrft.strategy.config.StrategyConfig attribute) O offload_deepspeed_states() (in module lightrft.strategy.deepspeed.deepspeed_utils) offload_fsdp_optimizer() (in module lightrft.strategy.fsdp.fsdp_optimizer) OmniRewardBenchT2AHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2IGRMHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2IHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2IPairHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2VHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2VPairHandler (class in lightrft.datasets.omnirewardbench) optimizer_step() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) overlap_comm (lightrft.strategy.config.StrategyConfig attribute) overlong_buffer (lightrft.strategy.config.StrategyConfig attribute) overlong_buffer_len (lightrft.strategy.config.StrategyConfig attribute) overlong_buffer_penalty_factor (lightrft.strategy.config.StrategyConfig attribute) P param_groups (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer property) parse_item() (lightrft.datasets.hpdv3.HPDv3GRMHandler method) (lightrft.datasets.hpdv3.HPDv3Handler method) (lightrft.datasets.hpdv3.HPDv3PairHandler method) (lightrft.datasets.image_reward_db.ImageRewardDBHandler method) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2AHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IPairHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VPairHandler method) (lightrft.datasets.rapidata.RapidataI2VHandler method) (lightrft.datasets.rapidata.RapidataI2VPairHandler method) (lightrft.datasets.rapidata.RapidataT2VHandler method) (lightrft.datasets.rapidata.RapidataT2VPairHandler method) (lightrft.datasets.utils.BaseDataHandler method) plot_every (lightrft.strategy.config.StrategyConfig attribute) postprocess() (lightrft.strategy.utils.parallel_utils.SPDataProcessor method) prepare() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) prepare_model() (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) prepare_models_and_optimizers() (lightrft.strategy.strategy_base.StrategyBase method) prepare_reward_models() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) preprocess() (lightrft.strategy.utils.parallel_utils.SPDataProcessor method) pretrain_data (lightrft.strategy.config.StrategyConfig attribute) print() (lightrft.strategy.strategy_base.StrategyBase class method) print_config_summary() (lightrft.strategy.config.StrategyConfig method) print_rank_0() (in module lightrft.utils.utils) R RapidataI2VHandler (class in lightrft.datasets.rapidata) RapidataI2VPairHandler (class in lightrft.datasets.rapidata) RapidataT2VHandler (class in lightrft.datasets.rapidata) RapidataT2VPairHandler (class in lightrft.datasets.rapidata) reduce_grads() (in module lightrft.strategy.fsdp.fsdp_utils) rejection_sampling_processor() (in module lightrft.utils.processor) release_memory_occupation() (in module lightrft.strategy.sglang_utils.sgl_model_saver) reload_deepspeed_states() (in module lightrft.strategy.deepspeed.deepspeed_utils) remote_rm_fn() (in module lightrft.utils.remote_rm_utils) remote_rm_url (lightrft.strategy.config.StrategyConfig attribute) report_memory() (lightrft.strategy.strategy_base.StrategyBase class method) request_api_wrapper() (in module lightrft.utils.remote_rm_utils) resume_memory_occupation() (in module lightrft.strategy.sglang_utils.sgl_model_saver) reward_clip (lightrft.strategy.config.StrategyConfig attribute) reward_normalization() (in module lightrft.utils.processor) reward_running_norm (lightrft.strategy.config.StrategyConfig attribute) reward_running_norm_minus_mean (lightrft.strategy.config.StrategyConfig attribute) RLGenerationEngine (class in lightrft.strategy.sglang_utils.sglang_engine) S save_ckpt() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) save_model() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) save_trajectories() (lightrft.utils.trajectory_saver.TrajectorySaver method) scale (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler property) seed (lightrft.strategy.config.StrategyConfig attribute) set_epoch() (lightrft.strategy.utils.data_utils.DistributedSampler method) (lightrft.utils.distributed_sampler.DistributedSampler method) set_seed() (lightrft.strategy.strategy_base.StrategyBase method) set_sequence_parallel_group() (in module lightrft.strategy.utils.parallel_utils) setup_dataloader() (lightrft.strategy.strategy_base.StrategyBase method) setup_distributed() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) setup_inference_engine() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) shutdown() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) sleep() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) SLEEPED (lightrft.strategy.strategy_base.EngineStatus attribute) sp_size (lightrft.strategy.config.StrategyConfig attribute) sp_slice_and_pad_input() (in module lightrft.strategy.utils.parallel_utils) SPDataProcessor (class in lightrft.strategy.utils.parallel_utils) start() (lightrft.utils.timer.Timer class method) (lightrft.utils.utils.DummyProfile method) state_dict() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.DynamicGradScaler method) step() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.utils.timer.Timer class method) (lightrft.utils.utils.DummyProfile method) stop() (lightrft.utils.timer.Timer class method) (lightrft.utils.utils.DummyProfile method) StrategyBase (class in lightrft.strategy.strategy_base) StrategyConfig (class in lightrft.strategy.config) sync_and_clear_cache() (lightrft.strategy.strategy_base.StrategyBase class method) T task_type (lightrft.datasets.hpdv3.HPDv3Handler attribute) (lightrft.datasets.image_reward_db.ImageRewardDBHandler attribute) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler attribute) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2AHandler attribute) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler attribute) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VHandler attribute) (lightrft.datasets.rapidata.RapidataI2VHandler attribute) (lightrft.datasets.rapidata.RapidataI2VPairHandler attribute) (lightrft.datasets.rapidata.RapidataT2VHandler attribute) Timer (class in lightrft.utils.timer) train_batch_size (lightrft.strategy.config.StrategyConfig attribute) TrajectorySaver (class in lightrft.utils.trajectory_saver) U unwrap_model() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) update() (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler method) (lightrft.strategy.fsdp.fsdp_utils.DynamicGradScaler method) update_engine_weights() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) update_weight() (lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray.WorkerWrap method) update_weights_from_tensor() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) use_kl_loss (lightrft.strategy.config.StrategyConfig attribute) use_mp_opt (lightrft.strategy.config.StrategyConfig attribute) use_tensorboard (lightrft.strategy.config.StrategyConfig attribute) W wake_up() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) WAKEUP (lightrft.strategy.strategy_base.EngineStatus attribute) wakeup_inference_engine() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) WorkerWrap (class in lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray) Z zero_grad() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) zero_pad_sequences() (in module lightrft.datasets.utils) zero_stage (lightrft.strategy.config.StrategyConfig attribute) zpg (lightrft.strategy.config.StrategyConfig attribute)