lightrft.trainerΒΆ
- lightrft.trainer.experience_maker
- lightrft.trainer.experience_maker_vl
ExperienceVLExperienceVL.action_entropyExperienceVL.action_log_probsExperienceVL.action_maskExperienceVL.advantagesExperienceVL.attention_maskExperienceVL.base_action_log_probsExperienceVL.image_grid_thwsExperienceVL.infoExperienceVL.klExperienceVL.pin_memory()ExperienceVL.pixel_valuesExperienceVL.pixel_values_videosExperienceVL.raw_imagesExperienceVL.returnsExperienceVL.sequencesExperienceVL.to_device()ExperienceVL.valuesExperienceVL.video_grid_thws
NaiveExperienceMakerVLNaiveExperienceMakerVL.generate_samples()NaiveExperienceMakerVL.get_advantages_and_returns()NaiveExperienceMakerVL.get_cumulative_returns()NaiveExperienceMakerVL.make_experience()NaiveExperienceMakerVL.make_experience_list()NaiveExperienceMakerVL.process_experiences()NaiveExperienceMakerVL.processor_fn()NaiveExperienceMakerVL.tokenize_fn()
SamplesVLSamplesVL.action_maskSamplesVL.attention_maskSamplesVL.image_grid_thwsSamplesVL.image_numSamplesVL.labelsSamplesVL.num_actionsSamplesVL.output_textsSamplesVL.packed_seq_lensSamplesVL.pixel_valuesSamplesVL.pixel_values_videosSamplesVL.promptsSamplesVL.raw_imagesSamplesVL.referencesSamplesVL.response_lengthSamplesVL.sequencesSamplesVL.total_lengthSamplesVL.video_grid_thwsSamplesVL.video_num
cumulative_product()pin_memory()to()
- lightrft.trainer.fast_exp_maker
- lightrft.trainer.grm_trainer_vl
- lightrft.trainer.kl_controller
- lightrft.trainer.ppo_trainer
- lightrft.trainer.ppo_trainer_vl
- lightrft.trainer.replay_buffer
- lightrft.trainer.replay_buffer_utils
BufferItemBufferItemVLBufferItemVL.action_entropyBufferItemVL.action_log_probsBufferItemVL.action_maskBufferItemVL.advantagesBufferItemVL.attention_maskBufferItemVL.base_action_log_probsBufferItemVL.image_grid_thwsBufferItemVL.infoBufferItemVL.pixel_valuesBufferItemVL.pixel_values_videosBufferItemVL.raw_imagesBufferItemVL.returnsBufferItemVL.sequencesBufferItemVL.valuesBufferItemVL.video_grid_thws
is_vl_experience()make_experience_batch()remove_padding_in_sequences()split_experience_batch()zero_pad_sequences()
- lightrft.trainer.replay_buffer_vl
- lightrft.trainer.spmd_ppo_trainer
- lightrft.trainer.srm_trainer_al
- lightrft.trainer.srm_trainer_vl
- lightrft.trainer.utils