lmflow.pipeline.utils.lisa_trainer_fsdp#

Attributes#

`logger`
`LISA_LAYER_NAME_MAPPING`
`LISA_BODY_LAYER_PARAM_GROUPS_IDX`

Classes#

LISATrainer

Module Contents#

lmflow.pipeline.utils.lisa_trainer_fsdp.logger[source]#

lmflow.pipeline.utils.lisa_trainer_fsdp.LISA_LAYER_NAME_MAPPING[source]#

lmflow.pipeline.utils.lisa_trainer_fsdp.LISA_BODY_LAYER_PARAM_GROUPS_IDX = [2, 3][source]#

class lmflow.pipeline.utils.lisa_trainer_fsdp.LISATrainer(n_layers: int, interval_steps: int, lisa_layer_attr_name: str = None, *args, **kwargs)[source]#

Bases: transformers.Trainer

n_layers[source]#

interval_steps[source]#

num_body_layers[source]#

active_layers_indices = [][source]#

histroy_layers_indices = [][source]#

active_layers_names = [][source]#

_get_all_body_layers() → List[torch.nn.Module][source]#: Fetch all the layers of the model excluding the head

_get_active_layers_names() → List[str][source]#

_update_active_layer_info()[source]#

_switch_active_layers()[source]#: Switch the active layers for the next interval. Objects that will be updated after calling: 1. self.active_layers_indices 2. self.active_layers_names 3. requires_grad of the parameters

maybe_switch_active_layers()[source]#

create_optimizer()[source]#: Setup the optimizer. Adopted from transformers.Trainer.create_optimizer.