lmflow.pipeline.utils.lisa_trainer_cache#

Attributes#

`logger`
`LISA_LAYER_NAME_MAPPING`
`LISA_BODY_LAYER_PARAM_GROUPS_IDX`
`NON_LISA_LAYER_PARAM_GROUPS_IDX`

Classes#

LISATrainer

Functions#

tag([info])

Module Contents#

lmflow.pipeline.utils.lisa_trainer_cache.logger[source]#

lmflow.pipeline.utils.lisa_trainer_cache.LISA_LAYER_NAME_MAPPING[source]#

lmflow.pipeline.utils.lisa_trainer_cache.LISA_BODY_LAYER_PARAM_GROUPS_IDX = [0, 1][source]#

lmflow.pipeline.utils.lisa_trainer_cache.NON_LISA_LAYER_PARAM_GROUPS_IDX = [2, 3][source]#

class lmflow.pipeline.utils.lisa_trainer_cache.LISATrainer(n_layers: int, interval_steps: int, lisa_layer_attr_name: str = None, *args, **kwargs)[source]#

Bases: transformers.Trainer

n_layers[source]#

interval_steps[source]#

num_body_layers[source]#

active_layers_indices = [][source]#

histroy_layers_indices = [][source]#

active_layers_names = [][source]#

_optimizer_param_group_initialized = False[source]#

_get_all_body_layers() → List[torch.nn.Module][source]#: Fetch all the layers of the model excluding the head

_get_active_layers_names() → List[str][source]#

_update_active_layer_info()[source]#

_switch_active_layers()[source]#: Switch the active layers for the next interval. Objects that will be updated after calling: 1. self.active_layers_indices 2. self.active_layers_names 3. requires_grad of the parameters

maybe_switch_active_layers()[source]#

create_optimizer()[source]#: Setup the optimizer. Adopted from transformers.Trainer.create_optimizer.

_prepare_optimizer_param_group(opt_model: torch.nn.Module)[source]#

_post_init_deepspeed_zero_optimizer_params(optimizer: deepspeed.runtime.zero.stage_1_and_2.DeepSpeedZeroOptimizer)[source]#

lmflow.pipeline.utils.lisa_trainer_cache.tag(info='')[source]#