Index _ | A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | U | V | W | Z _ __enter__() (lightrft.utils.timer.Timer method) (lightrft.utils.utils.DummyProfile method) __exit__() (lightrft.utils.timer.Timer method) (lightrft.utils.utils.DummyProfile method) __init__() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) (lightrft.strategy.utils.broadcast_utils.BroadcastManager method) (lightrft.trainer.fast_exp_maker.FastExperienceMaker method) (lightrft.trainer.spmd_ppo_trainer.SPMDPPOTrainer method) (lightrft.trainer.spmd_ppo_trainer.SPMDPPOTrainerBase method) (lightrft.trainer.spmd_ppo_trainer.SPMDPPOTrainerVL method) (lightrft.utils.timer.Timer method) (lightrft.utils.utils.DummyProfile method) __iter__() (lightrft.strategy.utils.data_utils.DistributedSampler method) __len__() (lightrft.strategy.utils.data_utils.DistributedSampler method) _build_multimodal_inputs() (lightrft.strategy.strategy_base.StrategyBase class method) _DEFAULT_NO_DECAY_NAME_LIST (in module lightrft.strategy.utils.optimizer_utils) A action_entropy (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) action_log_probs (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) action_mask (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) actor_learning_rate (lightrft.strategy.config.StrategyConfig attribute) ActorAL (class in lightrft.models.actor_al) ActorLanguage (class in lightrft.models.actor_language) ActorVL (class in lightrft.models.actor_vl) adam_betas (lightrft.strategy.config.StrategyConfig attribute) adam_offload (lightrft.strategy.config.StrategyConfig attribute) AdaptiveKLController (class in lightrft.trainer.kl_controller) add_arguments() (in module lightrft.utils.cli_args) add_param_group() (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) advantage_clip (lightrft.strategy.config.StrategyConfig attribute) advantage_estimator (lightrft.strategy.config.StrategyConfig attribute) advantages (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) advantages_norm (lightrft.strategy.config.StrategyConfig attribute) all_gather() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) all_gather_all_prompt_token_ids() (in module lightrft.strategy.utils.distributed_util) all_reduce() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) analyse_output_lengths() (in module lightrft.strategy.utils.statistic) analyze_output_lengths() (in module lightrft.strategy.utils.statistic) append() (lightrft.trainer.replay_buffer.NaiveReplayBuffer method) (lightrft.trainer.replay_buffer_vl.NaiveReplayBufferVL method) apply_monkey_patch_to_llama() (in module lightrft.models.monkey_patch.apply) apply_monkey_patch_to_qwen2() (in module lightrft.models.monkey_patch.apply) attention_mask (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) B backward() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) backward_by_grad() (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) base_action_log_probs (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) BaseDataHandler (class in lightrft.datasets.utils) BaseGradScaler (class in lightrft.strategy.fsdp.fsdp_utils) BaseOptimizer (class in lightrft.strategy.fsdp.fsdp_utils) bf16 (lightrft.strategy.config.StrategyConfig attribute) broadcast_to_engine() (lightrft.strategy.utils.broadcast_utils.BroadcastManager method) BroadcastManager (class in lightrft.strategy.utils.broadcast_utils) BufferItem (class in lightrft.trainer.replay_buffer_utils) BufferItemVL (class in lightrft.trainer.replay_buffer_utils) C calc_l2_norm() (in module lightrft.strategy.fsdp.fsdp_utils) calc_lp() (in module lightrft.strategy.fsdp.fsdp_utils) clear() (lightrft.trainer.replay_buffer.NaiveReplayBuffer method) (lightrft.trainer.replay_buffer_vl.NaiveReplayBufferVL method) clip_filter_like_weight_func() (in module lightrft.trainer.experience_maker) clip_grad_norm() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) collate_fn() (lightrft.datasets.grm_dataset.GRMDataset method) (lightrft.trainer.replay_buffer.NaiveReplayBuffer method) (lightrft.trainer.replay_buffer_vl.NaiveReplayBufferVL method) collect() (lightrft.strategy.utils.statistic.GenLenAnalyser method) collect_local_output_lengths() (in module lightrft.strategy.utils.statistic) compute_clip_fraction() (in module lightrft.trainer.utils) compute_norm() (in module lightrft.strategy.fsdp.fsdp_utils) concatenated_forward() (lightrft.trainer.srm_trainer_al.SRMTrainerAL method) (lightrft.trainer.srm_trainer_vl.SRMTrainerVL method) concatenated_inputs() (lightrft.trainer.srm_trainer_al.SRMTrainerAL method) (lightrft.trainer.srm_trainer_vl.SRMTrainerVL method) conditional_sft_processor() (in module lightrft.utils.processor) create_optimizer() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) create_sub_group() (in module lightrft.strategy.utils.distributed_util) create_trajectory_saver() (in module lightrft.utils.trajectory_saver) critic_learning_rate (lightrft.strategy.config.StrategyConfig attribute) critic_pretrain (lightrft.strategy.config.StrategyConfig attribute) cumulative_product() (in module lightrft.trainer.experience_maker_vl) D DeepspeedStrategy (class in lightrft.strategy.deepspeed.deepspeed) defaults (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer property) DistributedSampler (class in lightrft.strategy.utils.data_utils) (class in lightrft.utils.distributed_sampler) DPOLoss (class in lightrft.models.loss) DTENSOR_SUPPORTED (in module lightrft.strategy.fsdp.fsdp_optimizer) DummyProfile (class in lightrft.utils.utils) dynamic_sampling (lightrft.strategy.config.StrategyConfig attribute) DynamicGradScaler (class in lightrft.strategy.fsdp.fsdp_utils) E enable_engine_sleep (lightrft.strategy.config.StrategyConfig attribute) engine_generate_local() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) engine_tp_size (lightrft.strategy.config.StrategyConfig attribute) engine_type (lightrft.strategy.config.StrategyConfig attribute) evaluate() (lightrft.trainer.grm_trainer_vl.GRMTrainerVL method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) (lightrft.trainer.srm_trainer_al.SRMTrainerAL method) (lightrft.trainer.srm_trainer_vl.SRMTrainerVL method) exist_and_not_none() (in module lightrft.datasets.utils) Experience (class in lightrft.trainer.experience_maker) ExperienceVL (class in lightrft.trainer.experience_maker_vl) extra_args (lightrft.strategy.config.StrategyConfig attribute) extract_answer() (in module lightrft.datasets.utils) F FakeStrategy (class in lightrft.strategy.fake_strategy) FastExperienceMaker (class in lightrft.trainer.fast_exp_maker) find_latest_checkpoint_dir() (in module lightrft.strategy.utils.ckpt_utils) find_subsequence() (in module lightrft.datasets.utils) fire_sampling() (in module lightrft.trainer.utils) fit() (lightrft.trainer.grm_trainer_vl.GRMTrainerVL method) (lightrft.trainer.ppo_trainer.PPOTrainer method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) (lightrft.trainer.srm_trainer_al.SRMTrainerAL method) (lightrft.trainer.srm_trainer_vl.SRMTrainerVL method) FixedKLController (class in lightrft.trainer.kl_controller) forward() (lightrft.models.actor_al.ActorAL method) (lightrft.models.actor_language.ActorLanguage method) (lightrft.models.actor_vl.ActorVL method) (lightrft.models.grm_vl.GenerativeRewardModelVL method) (lightrft.models.loss.DPOLoss method) (lightrft.models.loss.GPTLMLoss method) (lightrft.models.loss.HPSLoss method) (lightrft.models.loss.KDLoss method) (lightrft.models.loss.KTOLoss method) (lightrft.models.loss.LogExpLoss method) (lightrft.models.loss.LogSigmoidLoss method) (lightrft.models.loss.PairWiseLoss method) (lightrft.models.loss.PolicyLoss method) (lightrft.models.loss.PRMLoss method) (lightrft.models.loss.ValueLoss method) (lightrft.models.loss.VanillaKTOLoss method) (lightrft.models.srm_al.ScalarRewardModelAL method) (lightrft.models.srm_vl.ScalarRewardModelVL method) from_args() (lightrft.strategy.config.StrategyConfig class method) fsdp (lightrft.strategy.config.StrategyConfig attribute) fsdp_cpu_offload (lightrft.strategy.config.StrategyConfig attribute) FSDPadaptOptimizer (class in lightrft.strategy.fsdp.fsdp_optimizer) FSDPV2Strategy (class in lightrft.strategy.fsdp.fsdpv2) fused_linear_logprob (lightrft.strategy.config.StrategyConfig attribute) G gather_all_lengths() (in module lightrft.strategy.utils.statistic) gather_and_generate() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) gather_forward_split_backward_and_unpad() (in module lightrft.strategy.utils.parallel_utils) gather_inputs_object_for_inference() (in module lightrft.strategy.utils.distributed_util) generate() (lightrft.models.actor_al.ActorAL method) (lightrft.models.actor_language.ActorLanguage method) (lightrft.models.actor_vl.ActorVL method) (lightrft.models.grm_vl.GenerativeRewardModelVL method) (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) generate_samples() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) (lightrft.trainer.fast_exp_maker.FastExperienceMaker method) GenerativeRewardModelVL (class in lightrft.models.grm_vl) GenLenAnalyser (class in lightrft.strategy.utils.statistic) get_advantages_and_returns() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) get_cpgd_advantages_returns() (in module lightrft.trainer.utils) get_cumulative_returns() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) get_current_device() (in module lightrft.utils.utils) get_ds_eval_config() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) get_ds_train_config() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) get_eval_ds_config() (in module lightrft.strategy.deepspeed.deepspeed_utils) get_fake_strategy() (in module lightrft.strategy.fake_strategy) get_media_info() (lightrft.datasets.hpdv3.HPDv3Handler method) (lightrft.datasets.image_reward_db.ImageRewardDBHandler method) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2AHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VHandler method) (lightrft.datasets.rapidata.RapidataI2VHandler method) (lightrft.datasets.rapidata.RapidataT2VHandler method) (lightrft.datasets.utils.BaseDataHandler method) get_norm() (in module lightrft.strategy.fsdp.fsdp_utils) get_optimizer_grouped_parameters() (in module lightrft.strategy.utils.optimizer_utils) get_processor() (in module lightrft.utils.processor) get_rank() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) get_sequence_parallel_group() (in module lightrft.strategy.utils.parallel_utils) get_sequence_parallel_rank() (in module lightrft.strategy.utils.parallel_utils) get_sequence_parallel_world_size() (in module lightrft.strategy.utils.parallel_utils) get_sglang_engine() (in module lightrft.strategy.sglang_utils) get_strategy() (in module lightrft.strategy.strategy) get_task_instructions() (in module lightrft.datasets.utils) get_tensor_norm() (in module lightrft.strategy.fsdp.fsdp_utils) get_torch_profiler() (in module lightrft.utils.utils) get_train_ds_config() (in module lightrft.strategy.deepspeed.deepspeed_utils) get_vllm_engine() (in module lightrft.strategy.vllm_utils) GPTLMLoss (class in lightrft.models.loss) grad_accum_dtype (lightrft.strategy.config.StrategyConfig attribute) gradient_checkpointing_disable() (lightrft.models.actor_al.ActorAL method) (lightrft.models.actor_language.ActorLanguage method) (lightrft.models.actor_vl.ActorVL method) (lightrft.models.grm_vl.GenerativeRewardModelVL method) (lightrft.models.srm_al.ScalarRewardModelAL method) (lightrft.models.srm_vl.ScalarRewardModelVL method) gradient_checkpointing_enable() (lightrft.models.actor_al.ActorAL method) (lightrft.models.actor_language.ActorLanguage method) (lightrft.models.actor_vl.ActorVL method) (lightrft.models.grm_vl.GenerativeRewardModelVL method) (lightrft.models.srm_al.ScalarRewardModelAL method) (lightrft.models.srm_vl.ScalarRewardModelVL method) GRMDataset (class in lightrft.datasets.grm_dataset) GRMTrainerVL (class in lightrft.trainer.grm_trainer_vl) H HPDv3GRMHandler (class in lightrft.datasets.hpdv3) HPDv3Handler (class in lightrft.datasets.hpdv3) HPDv3PairHandler (class in lightrft.datasets.hpdv3) HPSLoss (class in lightrft.models.loss) I image_grid_thws (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) image_num (lightrft.trainer.experience_maker_vl.SamplesVL attribute) ImageGenCoTRewardGRMHandler (class in lightrft.datasets.imagegen_cot_reward) ImageRewardDBHandler (class in lightrft.datasets.image_reward_db) info (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) init_logger() (in module lightrft.utils.logging_utils) init_model_context() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) init_process_group() (in module lightrft.strategy.utils.distributed_util) inv_scale (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler property) is_actor() (in module lightrft.strategy.strategy_base) is_meta_initialized() (in module lightrft.strategy.fsdp.fsdp_utils) is_mp_optimizer() (in module lightrft.strategy.fsdp.fsdpv2) is_rank_0() (lightrft.strategy.fake_strategy.FakeStrategy class method) (lightrft.strategy.strategy_base.StrategyBase class method) is_vl_experience() (in module lightrft.trainer.replay_buffer_utils) iterative_dpo_processor() (in module lightrft.utils.processor) K KDLoss (class in lightrft.models.loss) kl (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) kl_estimator (lightrft.strategy.config.StrategyConfig attribute) KTOLoss (class in lightrft.models.loss) L l2 (lightrft.strategy.config.StrategyConfig attribute) labels (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) lightrft.datasets.grm_dataset module lightrft.datasets.hpdv3 module lightrft.datasets.image_reward_db module lightrft.datasets.imagegen_cot_reward module lightrft.datasets.omnirewardbench module lightrft.datasets.rapidata module lightrft.datasets.utils module lightrft.models module lightrft.models.actor_al module lightrft.models.actor_language module lightrft.models.actor_vl module lightrft.models.grm_vl module lightrft.models.loss module lightrft.models.monkey_patch module lightrft.models.monkey_patch.apply module lightrft.models.monkey_patch.llama module lightrft.models.monkey_patch.qwen module lightrft.models.srm_al module lightrft.models.srm_vl module lightrft.models.utils module lightrft.strategy module lightrft.strategy.config module lightrft.strategy.deepspeed module lightrft.strategy.deepspeed.deepspeed module lightrft.strategy.deepspeed.deepspeed_utils module lightrft.strategy.fake_strategy module lightrft.strategy.fsdp module lightrft.strategy.fsdp.fsdp_optimizer module lightrft.strategy.fsdp.fsdp_utils module lightrft.strategy.fsdp.fsdpv2 module lightrft.strategy.sglang_utils module lightrft.strategy.sglang_utils.sgl_model_saver module lightrft.strategy.sglang_utils.sglang_engine module lightrft.strategy.strategy module lightrft.strategy.strategy_base module lightrft.strategy.utils module lightrft.strategy.utils.broadcast_utils module lightrft.strategy.utils.ckpt_utils module lightrft.strategy.utils.data_utils module lightrft.strategy.utils.distributed_util module lightrft.strategy.utils.optimizer_utils module lightrft.strategy.utils.parallel_utils module lightrft.strategy.utils.statistic module lightrft.strategy.vllm_utils module lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray module lightrft.trainer module lightrft.trainer.experience_maker module lightrft.trainer.experience_maker_vl module lightrft.trainer.fast_exp_maker module lightrft.trainer.grm_trainer_vl module lightrft.trainer.kl_controller module lightrft.trainer.ppo_trainer module lightrft.trainer.ppo_trainer_vl module lightrft.trainer.replay_buffer module lightrft.trainer.replay_buffer_utils module lightrft.trainer.replay_buffer_vl module lightrft.trainer.spmd_ppo_trainer module lightrft.trainer.srm_trainer_al module lightrft.trainer.srm_trainer_vl module lightrft.trainer.utils module lightrft.utils module lightrft.utils.cli_args module lightrft.utils.distributed_sampler module lightrft.utils.logging_utils module lightrft.utils.processor module lightrft.utils.remote_rm_utils module lightrft.utils.timer module lightrft.utils.trajectory_saver module lightrft.utils.utils module llama_attn_forward() (in module lightrft.models.monkey_patch.llama) load_ckpt() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) load_data() (lightrft.datasets.hpdv3.HPDv3Handler method) (lightrft.datasets.image_reward_db.ImageRewardDBHandler method) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler method) (lightrft.datasets.rapidata.RapidataT2VHandler method) (lightrft.datasets.utils.BaseDataHandler method) load_fsdp_optimizer() (in module lightrft.strategy.fsdp.fsdp_optimizer) load_model() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) load_multimodal_content() (in module lightrft.datasets.utils) load_state_dict() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.DynamicGradScaler method) local_rank (lightrft.strategy.config.StrategyConfig attribute) log_probs_from_logits() (in module lightrft.models.utils) LogExpLoss (class in lightrft.models.loss) LogSigmoidLoss (class in lightrft.models.loss) loss_scale (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer property) lr_warmup_ratio (lightrft.strategy.config.StrategyConfig attribute) M make_experience() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) (lightrft.trainer.fast_exp_maker.FastExperienceMaker method) make_experience_batch() (in module lightrft.trainer.replay_buffer_utils) make_experience_list() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) (lightrft.trainer.fast_exp_maker.FastExperienceMaker method) manual_transformer_cls_names_to_wrap (in module lightrft.strategy.fsdp.fsdpv2) max_norm (lightrft.strategy.config.StrategyConfig attribute) maybe_load_optimizer() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) maybe_offload_optimizer() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) maybe_sleep_inference_engine() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) micro_rollout_batch_size (lightrft.strategy.config.StrategyConfig attribute) micro_train_batch_size (lightrft.strategy.config.StrategyConfig attribute) mixed_mm_data (lightrft.strategy.config.StrategyConfig attribute) modality (lightrft.models.actor_al.ActorAL attribute) (lightrft.models.actor_language.ActorLanguage attribute) (lightrft.models.actor_vl.ActorVL attribute) ModelOptimPair (in module lightrft.strategy.deepspeed.deepspeed) (in module lightrft.strategy.fsdp.fsdpv2) ModelOrModelOptimPair (in module lightrft.strategy.deepspeed.deepspeed) (in module lightrft.strategy.fsdp.fsdpv2) module lightrft.datasets.grm_dataset lightrft.datasets.hpdv3 lightrft.datasets.image_reward_db lightrft.datasets.imagegen_cot_reward lightrft.datasets.omnirewardbench lightrft.datasets.rapidata lightrft.datasets.utils lightrft.models lightrft.models.actor_al lightrft.models.actor_language lightrft.models.actor_vl lightrft.models.grm_vl lightrft.models.loss lightrft.models.monkey_patch lightrft.models.monkey_patch.apply lightrft.models.monkey_patch.llama lightrft.models.monkey_patch.qwen lightrft.models.srm_al lightrft.models.srm_vl lightrft.models.utils lightrft.strategy lightrft.strategy.config lightrft.strategy.deepspeed lightrft.strategy.deepspeed.deepspeed lightrft.strategy.deepspeed.deepspeed_utils lightrft.strategy.fake_strategy lightrft.strategy.fsdp lightrft.strategy.fsdp.fsdp_optimizer lightrft.strategy.fsdp.fsdp_utils lightrft.strategy.fsdp.fsdpv2 lightrft.strategy.sglang_utils lightrft.strategy.sglang_utils.sgl_model_saver lightrft.strategy.sglang_utils.sglang_engine lightrft.strategy.strategy lightrft.strategy.strategy_base lightrft.strategy.utils lightrft.strategy.utils.broadcast_utils lightrft.strategy.utils.ckpt_utils lightrft.strategy.utils.data_utils lightrft.strategy.utils.distributed_util lightrft.strategy.utils.optimizer_utils lightrft.strategy.utils.parallel_utils lightrft.strategy.utils.statistic lightrft.strategy.vllm_utils lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray lightrft.trainer lightrft.trainer.experience_maker lightrft.trainer.experience_maker_vl lightrft.trainer.fast_exp_maker lightrft.trainer.grm_trainer_vl lightrft.trainer.kl_controller lightrft.trainer.ppo_trainer lightrft.trainer.ppo_trainer_vl lightrft.trainer.replay_buffer lightrft.trainer.replay_buffer_utils lightrft.trainer.replay_buffer_vl lightrft.trainer.spmd_ppo_trainer lightrft.trainer.srm_trainer_al lightrft.trainer.srm_trainer_vl lightrft.trainer.utils lightrft.utils lightrft.utils.cli_args lightrft.utils.distributed_sampler lightrft.utils.logging_utils lightrft.utils.processor lightrft.utils.remote_rm_utils lightrft.utils.timer lightrft.utils.trajectory_saver lightrft.utils.utils moving_average() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) multi_tensor_l2norm_torch() (in module lightrft.strategy.fsdp.fsdp_utils) N n_samples_per_prompt (lightrft.strategy.config.StrategyConfig attribute) NaiveExperienceMaker (class in lightrft.trainer.experience_maker) NaiveExperienceMakerVL (class in lightrft.trainer.experience_maker_vl) NaiveReplayBuffer (class in lightrft.trainer.replay_buffer) NaiveReplayBufferVL (class in lightrft.trainer.replay_buffer_vl) normalize() (lightrft.trainer.replay_buffer.NaiveReplayBuffer method) (lightrft.trainer.replay_buffer_vl.NaiveReplayBufferVL method) num_actions (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) O offload_deepspeed_states() (in module lightrft.strategy.deepspeed.deepspeed_utils) offload_fsdp_optimizer() (in module lightrft.strategy.fsdp.fsdp_optimizer) OmniRewardBenchT2AHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2IGRMHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2IHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2IPairHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2VHandler (class in lightrft.datasets.omnirewardbench) OmniRewardBenchT2VPairHandler (class in lightrft.datasets.omnirewardbench) optimizer_step() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) output_texts (lightrft.trainer.experience_maker_vl.SamplesVL attribute) overlap_comm (lightrft.strategy.config.StrategyConfig attribute) overlong_buffer (lightrft.strategy.config.StrategyConfig attribute) overlong_buffer_len (lightrft.strategy.config.StrategyConfig attribute) overlong_buffer_penalty_factor (lightrft.strategy.config.StrategyConfig attribute) P packed_seq_lens (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) pad_len (lightrft.trainer.experience_maker.Samples attribute) PairWiseLoss (class in lightrft.models.loss) param_groups (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer property) parse_item() (lightrft.datasets.hpdv3.HPDv3GRMHandler method) (lightrft.datasets.hpdv3.HPDv3Handler method) (lightrft.datasets.hpdv3.HPDv3PairHandler method) (lightrft.datasets.image_reward_db.ImageRewardDBHandler method) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2AHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IGRMHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IPairHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VHandler method) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VPairHandler method) (lightrft.datasets.rapidata.RapidataI2VHandler method) (lightrft.datasets.rapidata.RapidataI2VPairHandler method) (lightrft.datasets.rapidata.RapidataT2VHandler method) (lightrft.datasets.rapidata.RapidataT2VPairHandler method) (lightrft.datasets.utils.BaseDataHandler method) pin_memory() (in module lightrft.trainer.experience_maker) (in module lightrft.trainer.experience_maker_vl) (lightrft.trainer.experience_maker.Experience method) (lightrft.trainer.experience_maker_vl.ExperienceVL method) pixel_values (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) pixel_values_videos (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) plot_every (lightrft.strategy.config.StrategyConfig attribute) PolicyLoss (class in lightrft.models.loss) postprocess() (lightrft.strategy.utils.parallel_utils.SPDataProcessor method) ppo_train() (lightrft.trainer.ppo_trainer.PPOTrainer method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) (lightrft.trainer.spmd_ppo_trainer.SPMDPPOTrainer method) (lightrft.trainer.spmd_ppo_trainer.SPMDPPOTrainerBase method) (lightrft.trainer.spmd_ppo_trainer.SPMDPPOTrainerVL method) PPOTrainer (class in lightrft.trainer.ppo_trainer) PPOTrainerVL (class in lightrft.trainer.ppo_trainer_vl) prepare() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) prepare_model() (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) prepare_models_and_optimizers() (lightrft.strategy.strategy_base.StrategyBase method) prepare_reward_models() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) preprocess() (lightrft.strategy.utils.parallel_utils.SPDataProcessor method) pretrain_data (lightrft.strategy.config.StrategyConfig attribute) print() (lightrft.strategy.strategy_base.StrategyBase class method) print_config_summary() (lightrft.strategy.config.StrategyConfig method) print_rank_0() (in module lightrft.utils.utils) print_trainable_parameters() (lightrft.models.actor_al.ActorAL method) (lightrft.models.actor_language.ActorLanguage method) (lightrft.models.actor_vl.ActorVL method) (lightrft.models.grm_vl.GenerativeRewardModelVL method) (lightrft.models.srm_al.ScalarRewardModelAL method) (lightrft.models.srm_vl.ScalarRewardModelVL method) PRMLoss (class in lightrft.models.loss) process_experiences() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) process_sequences() (lightrft.models.actor_al.ActorAL method) (lightrft.models.actor_language.ActorLanguage method) (lightrft.models.actor_vl.ActorVL method) processor_fn() (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) prompts (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) Q qwen2_attn_forward() (in module lightrft.models.monkey_patch.qwen) R RapidataI2VHandler (class in lightrft.datasets.rapidata) RapidataI2VPairHandler (class in lightrft.datasets.rapidata) RapidataT2VHandler (class in lightrft.datasets.rapidata) RapidataT2VPairHandler (class in lightrft.datasets.rapidata) raw_images (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) reduce_grads() (in module lightrft.strategy.fsdp.fsdp_utils) references (lightrft.trainer.experience_maker_vl.SamplesVL attribute) rejection_sampling_processor() (in module lightrft.utils.processor) release_memory_occupation() (in module lightrft.strategy.sglang_utils.sgl_model_saver) reload_deepspeed_states() (in module lightrft.strategy.deepspeed.deepspeed_utils) remote_rm_fn() (in module lightrft.utils.remote_rm_utils) remote_rm_url (lightrft.strategy.config.StrategyConfig attribute) remove_padding_in_sequences() (in module lightrft.trainer.replay_buffer_utils) report_memory() (lightrft.strategy.strategy_base.StrategyBase class method) request_api_wrapper() (in module lightrft.utils.remote_rm_utils) response_length (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) resume_memory_occupation() (in module lightrft.strategy.sglang_utils.sgl_model_saver) returns (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) reward_clip (lightrft.strategy.config.StrategyConfig attribute) reward_normalization() (in module lightrft.utils.processor) reward_running_norm (lightrft.strategy.config.StrategyConfig attribute) reward_running_norm_minus_mean (lightrft.strategy.config.StrategyConfig attribute) RLGenerationEngine (class in lightrft.strategy.sglang_utils.sglang_engine) RunningMoments (class in lightrft.trainer.utils) S sample() (lightrft.trainer.replay_buffer.NaiveReplayBuffer method) (lightrft.trainer.replay_buffer_vl.NaiveReplayBufferVL method) Samples (class in lightrft.trainer.experience_maker) SamplesVL (class in lightrft.trainer.experience_maker_vl) save_ckpt() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) save_logs_and_checkpoints() (lightrft.trainer.grm_trainer_vl.GRMTrainerVL method) (lightrft.trainer.ppo_trainer.PPOTrainer method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) (lightrft.trainer.srm_trainer_al.SRMTrainerAL method) (lightrft.trainer.srm_trainer_vl.SRMTrainerVL method) save_model() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) save_trajectories() (lightrft.utils.trajectory_saver.TrajectorySaver method) ScalarRewardModelAL (class in lightrft.models.srm_al) ScalarRewardModelVL (class in lightrft.models.srm_vl) scale (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler property) seed (lightrft.strategy.config.StrategyConfig attribute) sequences (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) set_epoch() (lightrft.strategy.utils.data_utils.DistributedSampler method) (lightrft.utils.distributed_sampler.DistributedSampler method) set_seed() (lightrft.strategy.strategy_base.StrategyBase method) set_sequence_parallel_group() (in module lightrft.strategy.utils.parallel_utils) setup_dataloader() (lightrft.strategy.strategy_base.StrategyBase method) setup_distributed() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) setup_inference_engine() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) shutdown() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) sleep() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) SLEEPED (lightrft.strategy.strategy_base.EngineStatus attribute) sp_size (lightrft.strategy.config.StrategyConfig attribute) sp_slice_and_pad_input() (in module lightrft.strategy.utils.parallel_utils) SPDataProcessor (class in lightrft.strategy.utils.parallel_utils) split_experience_batch() (in module lightrft.trainer.replay_buffer_utils) SPMDPPOTrainer (class in lightrft.trainer.spmd_ppo_trainer) SPMDPPOTrainerBase (class in lightrft.trainer.spmd_ppo_trainer) SPMDPPOTrainerVL (class in lightrft.trainer.spmd_ppo_trainer) SRMTrainerAL (class in lightrft.trainer.srm_trainer_al) SRMTrainerVL (class in lightrft.trainer.srm_trainer_vl) start() (lightrft.utils.timer.Timer class method) (lightrft.utils.utils.DummyProfile method) state_dict() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.DynamicGradScaler method) step() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) (lightrft.utils.timer.Timer class method) (lightrft.utils.utils.DummyProfile method) stop() (lightrft.utils.timer.Timer class method) (lightrft.utils.utils.DummyProfile method) StrategyBase (class in lightrft.strategy.strategy_base) StrategyConfig (class in lightrft.strategy.config) sync_and_clear_cache() (lightrft.strategy.strategy_base.StrategyBase class method) T task_type (lightrft.datasets.hpdv3.HPDv3Handler attribute) (lightrft.datasets.image_reward_db.ImageRewardDBHandler attribute) (lightrft.datasets.imagegen_cot_reward.ImageGenCoTRewardGRMHandler attribute) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2AHandler attribute) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2IHandler attribute) (lightrft.datasets.omnirewardbench.OmniRewardBenchT2VHandler attribute) (lightrft.datasets.rapidata.RapidataI2VHandler attribute) (lightrft.datasets.rapidata.RapidataI2VPairHandler attribute) (lightrft.datasets.rapidata.RapidataT2VHandler attribute) Timer (class in lightrft.utils.timer) to() (in module lightrft.trainer.experience_maker) (in module lightrft.trainer.experience_maker_vl) to_device() (lightrft.trainer.experience_maker.Experience method) (lightrft.trainer.experience_maker_vl.ExperienceVL method) tokenize_fn() (lightrft.trainer.experience_maker.NaiveExperienceMaker method) (lightrft.trainer.experience_maker_vl.NaiveExperienceMakerVL method) total_length (lightrft.trainer.experience_maker.Samples attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) train_batch_size (lightrft.strategy.config.StrategyConfig attribute) training_step() (lightrft.trainer.ppo_trainer.PPOTrainer method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) training_step_actor() (lightrft.trainer.ppo_trainer.PPOTrainer method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) training_step_critic() (lightrft.trainer.ppo_trainer.PPOTrainer method) (lightrft.trainer.ppo_trainer_vl.PPOTrainerVL method) TrajectorySaver (class in lightrft.utils.trajectory_saver) U unwrap_model() (lightrft.strategy.deepspeed.deepspeed.DeepspeedStrategy method) (lightrft.strategy.fsdp.fsdpv2.FSDPV2Strategy method) (lightrft.strategy.strategy_base.StrategyBase method) update() (lightrft.strategy.fsdp.fsdp_utils.BaseGradScaler method) (lightrft.strategy.fsdp.fsdp_utils.DynamicGradScaler method) (lightrft.trainer.kl_controller.AdaptiveKLController method) (lightrft.trainer.kl_controller.FixedKLController method) (lightrft.trainer.utils.RunningMoments method) update_engine_weights() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) update_weight() (lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray.WorkerWrap method) update_weights_from_tensor() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) use_kl_loss (lightrft.strategy.config.StrategyConfig attribute) use_mp_opt (lightrft.strategy.config.StrategyConfig attribute) use_tensorboard (lightrft.strategy.config.StrategyConfig attribute) V ValueLoss (class in lightrft.models.loss) values (lightrft.trainer.experience_maker.Experience attribute) (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItem attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) VanillaKTOLoss (class in lightrft.models.loss) video_grid_thws (lightrft.trainer.experience_maker_vl.ExperienceVL attribute) (lightrft.trainer.experience_maker_vl.SamplesVL attribute) (lightrft.trainer.replay_buffer_utils.BufferItemVL attribute) video_num (lightrft.trainer.experience_maker_vl.SamplesVL attribute) vllm_ge_0130() (in module lightrft.trainer.utils) W wake_up() (lightrft.strategy.sglang_utils.sglang_engine.RLGenerationEngine method) WAKEUP (lightrft.strategy.strategy_base.EngineStatus attribute) wakeup_inference_engine() (lightrft.strategy.fake_strategy.FakeStrategy method) (lightrft.strategy.strategy_base.StrategyBase method) WorkerWrap (class in lightrft.strategy.vllm_utils.vllm_worker_wrap_no_ray) Z zero_grad() (lightrft.strategy.fsdp.fsdp_optimizer.FSDPadaptOptimizer method) (lightrft.strategy.fsdp.fsdp_utils.BaseOptimizer method) zero_pad_sequences() (in module lightrft.datasets.utils) (in module lightrft.trainer.replay_buffer_utils) zero_stage (lightrft.strategy.config.StrategyConfig attribute) zpg (lightrft.strategy.config.StrategyConfig attribute)