lmflow.utils.conversation_template#

Submodules#

Attributes#

Classes#

Functions#

is_package_version_at_least(package_name, min_version)

Package Contents#

lmflow.utils.conversation_template.is_package_version_at_least(package_name, min_version)[source]#
lmflow.utils.conversation_template.EMPTY_TEMPLATE[source]#
lmflow.utils.conversation_template.EMPTY_NO_SPECIAL_TOKENS_TEMPLATE[source]#
class lmflow.utils.conversation_template.ConversationTemplate[source]#
user_formatter: Formatter#
assistant_formatter: Formatter#
function_formatter: Formatter | None = None#
observation_formatter: Formatter | None = None#
system_formatter: Formatter | None = None#
force_system: bool = False#
tools_formatter: Formatter | None = None#
separator: TemplateComponent | None = None#
remove_last_sep: bool = False#
special_starter: TemplateComponent | None = None#
special_stopper: TemplateComponent | None = None#
template_name: str | None = None#
system_default: str | None = None#
__post_init__()[source]#
encode_conversation(tokenizer: transformers.PreTrainedTokenizer, messages: List[Dict[str, str]], system: str | None = None, tools: List[str] | None = None, **kwargs) Sequence[Tuple[List[int], List[int]]][source]#

Messages here should be guaranteed to be in pairs, with the first message being the user message and the second message being the system message. Data example: ```json {

“conversation_id”: 2, “system”: “sysinfo1”, “tools”: [“tool_1_desc”], “messages”: [

{

“role”: “user”, “content”: “hi”

}, {

“role”: “assistant”, “content”: “Hello!”

}

]

}#

_encode(tokenizer: transformers.PreTrainedTokenizer, messages: List[Dict[str, str]], system: str | None = None, tools: str | None = None, **kwargs) Sequence[Tuple[List[int], List[int]]][source]#
_encode_template(template: List[TemplateComponent], tokenizer: transformers.PreTrainedTokenizer, **kwargs) List[int][source]#

Encode template components into token ids.

Parameters:
templateList[TemplateComponent]

Formatted template components.

tokenizerPreTrainedTokenizer

Tokenizer to convert tokens into token ids.

Returns:
List[int]

Encoded token ids.

post_process_pairs(encoded_pairs, tokenizer)[source]#
remove_last_separator(encoded_pairs: Sequence[Tuple[List[int], List[int]]], tokenizer: transformers.PreTrainedTokenizer) Sequence[Tuple[List[int], List[int]]][source]#
add_special_starter(encoded_pairs: Sequence[Tuple[List[int], List[int]]], tokenizer: transformers.PreTrainedTokenizer) Sequence[Tuple[List[int], List[int]]][source]#
add_special_stopper(encoded_pairs: Sequence[Tuple[List[int], List[int]]], tokenizer: transformers.PreTrainedTokenizer) Sequence[Tuple[List[int], List[int]]][source]#
_ensure_id_list(obj: int | List[int]) List[int][source]#

Make sure the object is a list of integers. Useful for handling token ids.

class lmflow.utils.conversation_template.ConversationTemplateForTool[source]#

Bases: ConversationTemplate

encode_conversation(tokenizer: transformers.PreTrainedTokenizer, messages: List[Dict[str, str]], system: str | None = None, tools: List[str] | None = None, **kwargs) Sequence[Tuple[List[int], List[int]]][source]#

Messages here should be guaranteed to be in pairs, with the first message being the user message and the second message being the system message. Data example: ```json {

“conversation_id”: 2, “system”: “sysinfo1”, “tools”: [“tool_1_desc”], “messages”: [

{

“role”: “user”, “content”: “hi”

}, {

“role”: “assistant”, “content”: “Hello!”

}

]

}#

_encode(tokenizer: transformers.PreTrainedTokenizer, messages: List[Dict[str, str]], system: str | None = None, tools: str | None = None, **kwargs) Sequence[Tuple[List[int], List[int]]][source]#
_encode_template(template: List[TemplateComponent], tokenizer: transformers.PreTrainedTokenizer, **kwargs) List[int][source]#

Encode template components into token ids.

Parameters:
templateList[TemplateComponent]

Formatted template components.

tokenizerPreTrainedTokenizer

Tokenizer to convert tokens into token ids.

Returns:
List[int]

Encoded token ids.

_handle_tools(tools: List[str] | None) str[source]#
lmflow.utils.conversation_template.CHATGLM3_TEMPLATE[source]#
lmflow.utils.conversation_template.CHATML_TEMPLATE[source]#
lmflow.utils.conversation_template.DEEPSEEK_V2_TEMPLATE[source]#
lmflow.utils.conversation_template.DEEPSEEK_V3_TEMPLATE = Multiline-String[source]#
Show Value
"""{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '

' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|>'}}{% generation %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '
' + '```json' + '
' + tool['function']['arguments'] + '
' + '```' + '<|tool▁call▁end|>'}}{% endgeneration %}{%- set ns.is_first = true -%}{%- else %}{% generation %}{{'
' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '
' + '```json' + '
' + tool['function']['arguments'] + '
' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{% endgeneration %}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% generation %}{{ message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{% endgeneration %}{%- else %}{{'<|Assistant|>'}}{% generation %}{{ message['content'] + '<|end▁of▁sentence|>'}}{% endgeneration %}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'
<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}"""
lmflow.utils.conversation_template.DEEPSEEK_R1_TEMPLATE = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif...[source]#
lmflow.utils.conversation_template.DEEPSEEK_R1_DISTILL_TEMPLATE = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif...[source]#
lmflow.utils.conversation_template.GEMMA_TEMPLATE[source]#
lmflow.utils.conversation_template.HYMBA_TEMPLATE[source]#
lmflow.utils.conversation_template.INTERNLM2_TEMPLATE[source]#
lmflow.utils.conversation_template.LLAMA2_TEMPLATE[source]#
lmflow.utils.conversation_template.LLAMA3_TEMPLATE[source]#
lmflow.utils.conversation_template.LLAMA3_TEMPLATE_FOR_TOOL[source]#
lmflow.utils.conversation_template.PHI3_TEMPLATE[source]#
lmflow.utils.conversation_template.QWEN2_TEMPLATE[source]#
lmflow.utils.conversation_template.QWEN2_TEMPLATE_FOR_TOOL[source]#
lmflow.utils.conversation_template.QWEN2_5_TEMPLATE = '{%- if tools %}{{- \'<|im_start|>system\\n\' }}{%- if messages[0][\'role\'] == \'system\' %}{{-...[source]#
lmflow.utils.conversation_template.QWEN2_5_1M_TEMPLATE = '{%- if tools %}{{- \'<|im_start|>system\\n\' }}{%- if messages[0][\'role\'] == \'system\' %}{{-...[source]#
lmflow.utils.conversation_template.QWEN2_5_MATH_TEMPLATE = '{%- if tools %}{{- \'<|im_start|>system\\n\' }}{%- if messages[0][\'role\'] == \'system\' %}{{-...[source]#
lmflow.utils.conversation_template.QWEN_QWQ_TEMPLATE = '{%- if tools %}{{- \'<|im_start|>system\\n\' }}{%- if messages[0][\'role\'] == \'system\' %}{{-...[source]#
lmflow.utils.conversation_template.YI1_5_TEMPLATE[source]#
lmflow.utils.conversation_template.ZEPHYR_TEMPLATE[source]#
lmflow.utils.conversation_template.logger[source]#
lmflow.utils.conversation_template.PRESET_TEMPLATES[source]#
lmflow.utils.conversation_template.JINJA_TEMPLATES[source]#