Source code for lmflow.utils.conversation_template.gemma

#!/usr/bin/env python
# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.
import logging
from dataclasses import dataclass

from .base import ConversationTemplate, StringFormatter, TemplateComponent

[docs] logger = logging.getLogger(__name__)
@dataclass
[docs] class GemmaConversationTemplate(ConversationTemplate):
[docs] def encode_conversation(self, *args, **kwargs): if kwargs.get("system"): logger.warning( "As of now, Gemma does not support system messages officially. " "ConversationTemplate will add your system messages right after " "the bos token and before the user message without any special formatting. " "For more details, please refer to the [official template]" "(https://huggingface.co/google/gemma-1.1-2b-it/blob/bf4924f313df5166dee1467161e886e55f2eb4d4/tokenizer_config.json#L1507)." ) return super().encode_conversation(*args, **kwargs)
[docs] GEMMA_TEMPLATE = GemmaConversationTemplate( template_name="gemma", user_formatter=StringFormatter( template=[TemplateComponent(type="string", content="<start_of_turn>user\n{{content}}<end_of_turn>\n")] ), assistant_formatter=StringFormatter( template=[TemplateComponent(type="string", content="<start_of_turn>model\n{{content}}<end_of_turn>\n")] ), system_formatter=StringFormatter(template=[TemplateComponent(type="string", content="{{content}}")]), special_starter=TemplateComponent(type="token", content="bos_token"), )
[docs] GEMMA3_TEMPLATE = """{{ bos_token }} {%- if messages[0]['role'] == 'system' -%} {%- if messages[0]['content'] is string -%} {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%} {%- else -%} {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%} {%- endif -%} {%- set loop_messages = messages[1:] -%}\n{%- else -%} {%- set first_user_prefix = \"\" -%} {%- set loop_messages = messages -%} {%- endif -%} {%- for message in loop_messages -%} {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%} {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }} {%- endif -%} {%- if (message['role'] == 'assistant') -%} {%- set role = \"model\" -%} {%- else -%} {%- set role = message['role'] -%} {%- endif -%} {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }} {%- if (message['role'] == 'assistant') -%} {% generation %} {%- if message['content'] is string -%} {{ message['content'] | trim }} {%- elif message['content'] is iterable -%} {%- for item in message['content'] -%} {%- if item['type'] == 'image' -%} {{ '<start_of_image>' }} {%- elif item['type'] == 'text' -%} {{ item['text'] | trim }} {%- endif -%} {%- endfor -%} {%- else -%} {{ raise_exception(\"Invalid content type\") }} {%- endif -%} {{ '<end_of_turn>\n' }} {% endgeneration %} {%- else -%} {%- if message['content'] is string -%} {{ message['content'] | trim }} {%- elif message['content'] is iterable -%} {%- for item in message['content'] -%} {%- if item['type'] == 'image' -%} {{ '<start_of_image>' }} {%- elif item['type'] == 'text' -%} {{ item['text'] | trim }} {%- endif -%} {%- endfor -%} {%- else -%} {{ raise_exception(\"Invalid content type\") }} {%- endif -%} {{ '<end_of_turn>\n' }} {%- endif -%} {%- endfor -%} {%- if add_generation_prompt -%} {{'<start_of_turn>model\n'}} {%- endif -%} """