lmflow.pipeline.iterative_dpo_aligner#

Attributes#

Classes#

Module Contents#

lmflow.pipeline.iterative_dpo_aligner.logger[source]#
class lmflow.pipeline.iterative_dpo_aligner.IterativeDPOAligner(model_args: lmflow.args.ModelArguments, data_args: lmflow.args.DatasetArguments, aligner_args: lmflow.args.IterativeDPOAlignerArguments, ref_model_args: lmflow.args.ModelArguments, reward_model_args: lmflow.args.ModelArguments, **kwargs)[source]#
model_args[source]#
data_args[source]#
aligner_args[source]#
ref_model_args[source]#
reward_model_args[source]#
workspace_path[source]#
align(dataset_list: List[lmflow.datasets.dataset.Dataset])[source]#
_align_single_iteration(iteration_name: str, target_model_args: lmflow.args.ModelArguments, reward_model_args: lmflow.args.ModelArguments, ref_model_args: lmflow.args.ModelArguments, dataset: lmflow.datasets.dataset.Dataset)[source]#
_do_target_model_inference(model: lmflow.models.hf_decoder_model.HFDecoderModel, dataset: lmflow.datasets.dataset.Dataset, output_dir: str)[source]#
_do_reward_model_inference(model: lmflow.models.hf_text_regression_model.HFTextRegressionModel, dataset: lmflow.datasets.dataset.Dataset, output_dir: str)[source]#
_do_single_dpo_align(model_args: lmflow.args.ModelArguments, ref_model_args: lmflow.args.ModelArguments, data_args: lmflow.args.DatasetArguments, output_dir: str, iteration_name: str)[source]#
_parse_target_model_inference_args(args: lmflow.args.IterativeDPOAlignerArguments, result_cache_path: str) lmflow.args.InferencerArguments[source]#
_parse_reward_model_inference_args(args: lmflow.args.IterativeDPOAlignerArguments) lmflow.args.InferencerArguments[source]#
_parse_dpo_aligner_args(args: lmflow.args.IterativeDPOAlignerArguments, output_dir: str, iteration_name: str) lmflow.args.DPOv2AlignerArguments[source]#
__filter_args(mixed_args, target_cls)[source]#