lmflow.pipeline.iterative_dpo_aligner
Module Contents
-
lmflow.pipeline.iterative_dpo_aligner.logger[source]
-
class lmflow.pipeline.iterative_dpo_aligner.IterativeDPOAligner(model_args: lmflow.args.ModelArguments, data_args: lmflow.args.DatasetArguments, aligner_args: lmflow.args.IterativeDPOAlignerArguments, ref_model_args: lmflow.args.ModelArguments, reward_model_args: lmflow.args.ModelArguments, **kwargs)[source]
-
model_args[source]
-
data_args[source]
-
aligner_args[source]
-
ref_model_args[source]
-
reward_model_args[source]
-
workspace_path[source]
-
align(dataset_list: List[lmflow.datasets.dataset.Dataset])[source]
-
_align_single_iteration(iteration_name: str, target_model_args: lmflow.args.ModelArguments, reward_model_args: lmflow.args.ModelArguments, ref_model_args: lmflow.args.ModelArguments, dataset: lmflow.datasets.dataset.Dataset)[source]
-
_do_target_model_inference(model: lmflow.models.hf_decoder_model.HFDecoderModel, dataset: lmflow.datasets.dataset.Dataset, output_dir: str)[source]
-
_do_reward_model_inference(model: lmflow.models.hf_text_regression_model.HFTextRegressionModel, dataset: lmflow.datasets.dataset.Dataset, output_dir: str)[source]
-
_do_single_dpo_align(model_args: lmflow.args.ModelArguments, ref_model_args: lmflow.args.ModelArguments, data_args: lmflow.args.DatasetArguments, output_dir: str, iteration_name: str)[source]
-
_parse_target_model_inference_args(args: lmflow.args.IterativeDPOAlignerArguments, result_cache_path: str) → lmflow.args.InferencerArguments[source]
-
_parse_reward_model_inference_args(args: lmflow.args.IterativeDPOAlignerArguments) → lmflow.args.InferencerArguments[source]
-
_parse_dpo_aligner_args(args: lmflow.args.IterativeDPOAlignerArguments, output_dir: str, iteration_name: str) → lmflow.args.DPOv2AlignerArguments[source]
-
__filter_args(mixed_args, target_cls)[source]