Source code for lmflow.pipeline.utils.rm_dataprocessor
import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Union
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers.utils import PaddingStrategy
[docs]
logger = logging.getLogger(__name__)
@dataclass
[docs]
class RewardDataCollatorWithPadding:
[docs]
tokenizer: AutoTokenizer
[docs]
padding: Union[bool, str, PaddingStrategy] = True
[docs]
max_length: Optional[int] = None
[docs]
pad_to_multiple_of: Optional[int] = None
[docs]
return_tensors: str = "pt"
[docs]
def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
merged_features = []
for feature in features:
merged_features.append(
{
"input_ids": feature["input_ids_chosen"],
"attention_mask": feature["attention_mask_chosen"],
}
)
merged_features.append(
{
"input_ids": feature["input_ids_rejected"],
"attention_mask": feature["attention_mask_rejected"],
}
)
logger.debug(f"Chosen: {self.tokenizer.decode(feature['input_ids_chosen'])}")
logger.debug(f"Rejected: {self.tokenizer.decode(feature['input_ids_rejected'])}")
batch = self.tokenizer.pad(
merged_features,
padding=self.padding,
max_length=self.max_length,
pad_to_multiple_of=self.pad_to_multiple_of,
return_tensors=self.return_tensors,
)
batch = {
"input_ids": batch["input_ids"],
"attention_mask": batch["attention_mask"],
"return_loss": True,
}
return batch