lmflow.utils.flash_attention.gpt_neo_flash_attention#

Functions#

`_attn`(self, query, key, value[, attention_mask, head_mask])
`forward`(self, hidden_states[, attention_mask, ...])
`replace_gpt_neo_attn_with_flash_attn`()

lmflow.utils.flash_attention.gpt_neo_flash_attention._attn(self, query, key, value, attention_mask=None, head_mask=None)[source]#

lmflow.utils.flash_attention.gpt_neo_flash_attention.forward(self, hidden_states, attention_mask=None, layer_past=None, head_mask=None, use_cache=False, output_attentions=False)[source]#

lmflow.utils.flash_attention.gpt_neo_flash_attention.replace_gpt_neo_attn_with_flash_attn()[source]#