预训练范式

通过大模型预训练进行推荐

1、预训练的一般思路与方法

1.1 预训练数据的准备

直接利用用户行为序列
将用户行为序列 ID 转为相关文本
在用户行为序列中整合用户的特征

1.2 大模型架构的选择

BERT4Rec
PTUM
P5
M6-Rec

1.3 大模型预训练

Transformers 库
苹果 MLX 框架

1.4 大模型推理

Transformers 库
llama.cpp推理

2、案例

2.1 基于 PTUM 架构的预训练推荐系统

PTUM : Pre-Training User Model

掩码行为预测
后续行为预测

2.2 基于 P5 的预训练推荐系统

P5: Pretrain, Personalized Prompt, and Predict Paradigm

预训练数据准备
1. 评分、评论、解释
2. 序列推荐
3. 直接推荐
P5 模型架构
P5 预训练
P5 推理

3、基于 MIND 数据集的代码实战

3.1 预训练数据集准备

(1)生成预训练的序列数据
文件generate_user_sequence.py

import csv

user_sequence_list = []
with open('../data/mind/behaviors.tsv', 'r') as file:
    reader = csv.reader(file, delimiter='\t')
    for row in reader:
        uid = row[1]
        history = row[3]
        if len(history.split(" ")) >= 5:  # 用户至少要有5个点击历史
            r = uid + " " + history
            user_sequence_list.append(r)

user_sequence_path = "../data/mind/user_sequence.txt"
with open(user_sequence_path, 'a') as file:
    for r in user_sequence_list:
        file.write(r + "\n")

(2) 生成预训练数据
文件generate_dataset_train.py

import json
import os
import sys

import fire

sys.path.append('../')
from utils import sequential_indexing, load_prompt_template, check_task_prompt, get_info_from_prompt
from utils import construct_user_sequence_dict, read_line


def main(data_path: str, item_indexing: str, task: str, dataset: str, prompt_file: str, sequential_order: str,
         max_his: int, his_sep: str, his_prefix: int, skip_empty_his: int):
    file_data = dict()
    file_data['arguments'] = {
        "data_path": data_path, "item_indexing": item_indexing, "task": task,
        "dataset": dataset, "prompt_file": prompt_file, "sequential_order": sequential_order,
        "max_his": max_his, "his_sep": his_sep, "his_prefix": his_prefix, "skip_empty_his": skip_empty_his
    }
    file_data['data'] = []
    tasks = list(task)
    user_sequence = read_line(os.path.join(data_path, dataset, 'user_sequence.txt'))
    user_sequence_dict = construct_user_sequence_dict(user_sequence)

    reindex_user_seq_dict, item_map = sequential_indexing(data_path, dataset,
                                                          user_sequence_dict, sequential_order)

    # get prompt
    prompt = load_prompt_template(prompt_file, tasks)
    info = get_info_from_prompt(prompt)
    check_task_prompt(prompt, tasks)

    # Load training data samples
    training_data_samples = []
    for user in reindex_user_seq_dict:
        items = reindex_user_seq_dict[user][:-2]
        for i in range(len(items)):
            if i == 0:
                if skip_empty_his > 0:
                    continue
            one_sample = dict()
            one_sample['dataset'] = dataset
            one_sample['user_id'] = user
            if his_prefix > 0:
                one_sample['target'] = 'item_' + items[i]
            else:
                one_sample['target'] = items[i]
            if 'history' in info:
                history = items[:i]
                if max_his > 0:
                    history = history[-max_his:]
                if his_prefix > 0:
                    one_sample['history'] = his_sep.join(["item_" + item_idx for item_idx in history])
                else:
                    one_sample['history'] = his_sep.join(history)
            training_data_samples.append(one_sample)
    print("load training data")
    print(f'there are {len(training_data_samples)} samples in training data.')

    # construct sentences
    for i in range(len(training_data_samples)):
        one_sample = training_data_samples[i]
        for t in tasks:
            datapoint = {'task': dataset + t, 'data_id': i}
            for pid in prompt[t]['seen']:
                datapoint['instruction'] = prompt[t]['seen'][pid]['Input']
                datapoint['input'] = prompt[t]['seen'][pid]['Input'].format(**one_sample)
                datapoint['output'] = prompt[t]['seen'][pid]['Output'].format(**one_sample)
                file_data['data'].append(datapoint.copy())

    print("data constructed")
    print(f"there are {len(file_data['data'])} prompts in training data.")

    # save the data to json file
    output_path = f'{dataset}_{task}_{item_indexing}_train.json'

    with open(os.path.join(data_path, dataset, output_path), 'w') as openfile:
        json.dump(file_data, openfile)


if __name__ == "__main__":
    fire.Fire(main)

附录utils.py

import math
import os
import re
import numpy as np
import torch
from torch.utils.data import Dataset


def get_dict_from_lines(lines):
    """
    Used to get user or item map from lines loaded from txt file.
    """
    index_map = dict()
    for line in lines:
        info = line.split(" ")
        index_map[info[0]] = info[1]
    return index_map


def read_line(path):
    if not os.path.exists(path):
        raise FileNotFoundError
    lines = []
    with open(path, 'r') as fd:
        for line in fd:
            lines.append(line.rstrip('\n'))
    return lines


def write_dict_2_file(path, write_dict):
    with open(path, 'w') as out:
        for user, items in write_dict.items():
            if type(items) == list:
                out.write(user + ' ' + ' '.join(items) + '\n')
            else:
                out.write(user + ' ' + str(items) + '\n')


class EvaluationDataset(Dataset):
    def __init__(self, dataset, tokenizer, cutoff):
        super().__init__()
        self.input = tokenizer(
            dataset['input'], padding="longest", truncation=True, max_length=cutoff
        )
        self.output = tokenizer(
            dataset['output'], padding="longest", truncation=True, max_length=cutoff
        )

    def __len__(self):
        return len(self.input["input_ids"])

    def __getitem__(self, index):
        return {
            "input_ids": torch.tensor(self.input["input_ids"][index]),
            "attention_mask": torch.tensor(self.input["attention_mask"][index]),
            'label': torch.tensor(self.output["input_ids"][index])
        }


def load_prompt_template(path, task_list):
    """
    Load prompt template from the file. Keep training tasks only.
    Input:
    - path: The path for prompt template txt file.
    - task_list: A list of required tasks.
    Return:
    - prompt_templates: a dictionary of prompt templates. e.g., {task: {'seen': {'0': {'Input': template_input, 'Output': template_output}}}}

    """

    if not os.path.exists(path):
        raise FileNotFoundError
    prompt_info = read_line(path)
    prompt_templates = dict()
    for prompt in prompt_info:
        t = [sens.strip() for sens in prompt.split(';')]
        if t[0] not in task_list:
            continue
        if t[0] not in prompt_templates:
            prompt_templates[t[0]] = dict()
        if t[1] not in prompt_templates[t[0]]:
            prompt_templates[t[0]][t[1]] = dict()
        num = len(prompt_templates[t[0]][t[1]])
        prompt_templates[t[0]][t[1]][str(num)] = dict()
        prompt_templates[t[0]][t[1]][str(num)]['Input'] = t[2]
        prompt_templates[t[0]][t[1]][str(num)]['Output'] = t[3]
    return prompt_templates


def generate_user_map(user_sequence_dict):
    """
    generate user map based on user sequence dict.
    """
    user_map = dict()
    for user in user_sequence_dict.keys():
        user_map[user] = str(len(user_map) + 1)
    return user_map


def reindex(user_sequence_dict, user_map, item_map):
    """
    reindex the given user sequence dict by given user map and item map
    """
    reindex_user_sequence_dict = dict()
    for user in user_sequence_dict:
        uid = user_map[user]
        items = user_sequence_dict[user]
        reindex_user_sequence_dict[uid] = [item_map[i] for i in items]

    return reindex_user_sequence_dict


def construct_user_sequence_dict(user_sequence):
    """
    Convert a list of string to a user sequence dict. user as key, item list as value.
    """

    user_seq_dict = dict()
    for line in user_sequence:
        user_seq = line.split(" ")
        user_seq_dict[user_seq[0]] = user_seq[1:]
    return user_seq_dict


def sequential_indexing(data_path, dataset, user_sequence_dict, order):
    """
    Use sequential indexing method to index the given user seuqnece dict.
    """
    user_index_file = os.path.join(data_path, dataset, 'user_indexing.txt')
    item_index_file = os.path.join(data_path, dataset, f'item_sequential_indexing_{order}.txt')
    reindex_sequence_file = os.path.join(data_path, dataset, f'user_sequence_sequential_indexing_{order}.txt')

    if os.path.exists(reindex_sequence_file):
        user_sequence = read_line(reindex_sequence_file)

        item_info = read_line(item_index_file)
        item_map = get_dict_from_lines(item_info)

        return construct_user_sequence_dict(user_sequence), item_map

    # For user index, load from txt file if already exists, otherwise generate from user sequence and save.
    if os.path.exists(user_index_file):
        user_info = read_line(user_index_file)
        user_map = get_dict_from_lines(user_info)
    else:
        user_map = generate_user_map(user_sequence_dict)
        write_dict_2_file(user_index_file, user_map)

    # For item index, load from txt file if already exists, otherwise generate from user sequence and save.
    if os.path.exists(item_index_file):
        item_info = read_line(item_index_file)
        item_map = get_dict_from_lines(item_info)
    else:
        item_map = dict()
        if order == 'original':
            user_list = user_sequence_dict.keys()
        elif order == 'short2long':
            user_list = sorted(user_sequence_dict, key=lambda x: len(user_sequence_dict[x]), reverse=False)
        elif order == 'long2short':
            user_list = sorted(user_sequence_dict, key=lambda x: len(user_sequence_dict[x]), reverse=True)

        for user in user_list:
            items = user_sequence_dict[user][:-2]
            for item in items:
                if item not in item_map:
                    item_map[item] = str(len(item_map) + 1001)
        for user in user_list:
            items = user_sequence_dict[user][-2:]
            for item in items:
                if item not in item_map:
                    item_map[item] = str(len(item_map) + 1001)
        write_dict_2_file(item_index_file, item_map)

    reindex_user_sequence_dict = reindex(user_sequence_dict, user_map, item_map)
    write_dict_2_file(reindex_sequence_file, reindex_user_sequence_dict)
    return reindex_user_sequence_dict, item_map


def get_info_from_prompt(prompt_templates):
    """
    Extract the require information from the prompt templates.
    Input:
    - prompt_templates: a dictionary of prompt templates.
    Output:
    - info: a list of required information.
    """

    info = []
    for task in prompt_templates:
        for see in prompt_templates[task]:
            for i in prompt_templates[task][see]:
                info += re.findall(r'\{.*?\}', prompt_templates[task][see][i]['Input'])
                info += re.findall(r'\{.*?\}', prompt_templates[task][see][i]['Output'])
    info = [i[1:-1] for i in set(info)]
    return info


def check_task_prompt(prompt_templates, task_list):
    """
    Check if all tasks have prompt templates. Raise Error if training tasks have no prompt.
    Input:
    - prompt_templates: A dictionary of prompt templates.
    - task_list: A list of training tasks.
    """
    for task in task_list:
        assert task in prompt_templates, f"No prompt for {task} task"


def evaluation_results(predictions, targets, scores, k):
    results = []
    batch_length = len(targets)
    for b in range(batch_length):
        one_batch_sequence = predictions[
                             b * k: (b + 1) * k
                             ]
        one_batch_score = scores[
                          b * k: (b + 1) * k
                          ]
        pairs = [(a, b) for a, b in zip(one_batch_sequence, one_batch_score)]
        sorted_pairs = sorted(pairs, key=lambda x: x[1], reverse=True)
        gt = targets[b]
        one_results = []
        for sorted_pred in sorted_pairs:
            if sorted_pred[0] == gt:
                one_results.append(1)
            else:
                one_results.append(0)

        results.append(one_results)
    return results


def ndcg_at_k(relevance, k):
    """
    Since we apply leave-one-out, each user only have one ground truth item, so the idcg would be 1.0
    """
    ndcg = 0.0
    for row in relevance:
        rel = row[:k]
        one_ndcg = 0.0
        for i in range(len(rel)):
            one_ndcg += rel[i] / math.log(i + 2, 2)
        ndcg += one_ndcg
    return ndcg


def hit_at_k(relevance, k):
    correct = 0.0
    for row in relevance:
        rel = row[:k]
        if sum(rel) > 0:
            correct += 1
    return correct


def get_metrics_results(rel_results, metrics):
    res = []
    for m in metrics:
        if m.lower().startswith('hit'):
            k = int(m.split('@')[1])
            res.append(hit_at_k(rel_results, k))
        elif m.lower().startswith('ndcg'):
            k = int(m.split('@')[1])
            res.append(ndcg_at_k(rel_results, k))

    return np.array(res)


def load_test(reindex_user_seq_dict, info, dataset, his_prefix, max_his, his_sep):
    data_samples = []
    for user in reindex_user_seq_dict:
        items = reindex_user_seq_dict[user]
        one_sample = dict()
        one_sample['dataset'] = dataset
        one_sample['user_id'] = user
        if his_prefix > 0:
            one_sample['target'] = 'item_' + items[-1]
        else:
            one_sample['target'] = items[-1]
        if 'history' in info:
            history = items[:-1]
            if max_his > 0:
                history = history[-max_his:]
            if his_prefix > 0:
                one_sample['history'] = his_sep.join(["item_" + item_idx for item_idx in history])
            else:
                one_sample['history'] = his_sep.join(history)
        data_samples.append(one_sample)
    return data_samples


def load_validation(reindex_user_seq_dict, info, dataset, his_prefix, max_his, his_sep):
    data_samples = []
    for user in reindex_user_seq_dict:
        items = reindex_user_seq_dict[user]
        one_sample = dict()
        one_sample['dataset'] = dataset
        one_sample['user_id'] = user
        if his_prefix > 0:
            one_sample['target'] = 'item_' + items[-2]
        else:
            one_sample['target'] = items[-2]
        if 'history' in info:
            history = items[:-2]
            if max_his > 0:
                history = history[-max_his:]
            if his_prefix > 0:
                one_sample['history'] = his_sep.join(["item_" + item_idx for item_idx in history])
            else:
                one_sample['history'] = his_sep.join(history)
        data_samples.append(one_sample)
    return data_samples

(3)生成测试、验证数据
文件generate_dataset_eval.py

import json
import os
import sys

import fire

sys.path.append('../')
from utils import sequential_indexing, load_prompt_template, check_task_prompt, get_info_from_prompt
from utils import construct_user_sequence_dict, read_line, load_test, load_validation


def main(data_path: str, item_indexing: str, task: str, dataset: str, prompt_file: str, sequential_order: str,
         max_his: int, his_sep: str, his_prefix: int, skip_empty_his: int,
         mode: str, prompt: str):
    file_data = dict()
    file_data['arguments'] = {
        "data_path": data_path, "item_indexing": item_indexing, "task": task,
        "dataset": dataset, "prompt_file": prompt_file, "sequential_order": sequential_order,
        "max_his": max_his, "his_sep": his_sep, "his_prefix": his_prefix, "skip_empty_his": skip_empty_his,
        "mode": mode, "prompt": prompt
    }
    file_data['data'] = []
    tasks = list(task)

    user_sequence = read_line(os.path.join(data_path, dataset, 'user_sequence.txt'))
    user_sequence_dict = construct_user_sequence_dict(user_sequence)
    reindex_user_seq_dict, item_map = sequential_indexing(data_path, dataset,
                                                          user_sequence_dict, sequential_order)

    # get prompt
    prompt_ = load_prompt_template(prompt_file, tasks)
    info = get_info_from_prompt(prompt_)
    check_task_prompt(prompt_, tasks)

    # Load data samples
    if mode == 'validation':
        data_samples = load_validation(reindex_user_seq_dict, info, dataset, his_prefix, max_his, his_sep)
        prompt_info = prompt.split(':')
        output_path = f'{dataset}_{task}_{item_indexing}_validation_{prompt}.json'
    elif mode == 'test':
        data_samples = load_test(reindex_user_seq_dict, info, dataset, his_prefix, max_his, his_sep)
        prompt_info = prompt.split(':')
        output_path = f'{dataset}_{task}_{item_indexing}_test_{prompt}.json'
    else:
        raise NotImplementedError

    # construct sentences
    for i in range(len(data_samples)):
        one_sample = data_samples[i]
        for t in tasks:
            datapoint = {'task': dataset + t,
                         'instruction': prompt_[t][prompt_info[0]][prompt_info[1]]['Input'],
                         'input': prompt_[t][prompt_info[0]][prompt_info[1]]['Input'].format(**one_sample),
                         'output': prompt_[t][prompt_info[0]][prompt_info[1]]['Output'].format(**one_sample)}
            file_data['data'].append(datapoint.copy())

    with open(os.path.join(data_path, dataset, output_path), 'w') as openfile:
        json.dump(file_data, openfile)


if __name__ == "__main__":
    fire.Fire(main)

3.2 模型预训练

基于 T5 基地
文件t5_pre-train.py

import os
import fire
import transformers
from datasets import load_dataset
from transformers import (
    T5Config,
    T5ForConditionalGeneration,
    AutoTokenizer,
    Trainer,
    TrainingArguments
)


def main(backbone: str, data_path: str, item_indexing: str, task: str, dataset: str,
         valid_prompt: str, cutoff: int, model_dir: str, batch_size: int, valid_select: int,
         epochs: int, lr: float, warmup_steps: int, gradient_accumulation_steps: int,
         logging_steps: int, optim: str, eval_steps: int, save_steps: int, save_total_limit: int):

    config = T5Config.from_pretrained(backbone)
    model = T5ForConditionalGeneration.from_pretrained(backbone, config=config)
    tokenizer = AutoTokenizer.from_pretrained(backbone)

    train_data_file = os.path.join(data_path, dataset,
                                   f'{dataset}_{task}_{item_indexing}_train.json')
    valid_data_file = os.path.join(data_path, dataset,
                                   f'{dataset}_{task}_{item_indexing}_validation_{valid_prompt}.json')
    train_data = load_dataset("json", data_files=train_data_file, field='data')
    valid_data = load_dataset("json", data_files=valid_data_file, field='data')

    def tokenize(prompt, add_eos_token=True):
        result = tokenizer(
            prompt, truncation=True, max_length=cutoff, padding=False, return_tensors=None,
        )
        if (isinstance(result["input_ids"][-1], int) and result["input_ids"][-1] != tokenizer.eos_token_id
                and len(result["input_ids"]) < cutoff
                and add_eos_token
        ):
            result["input_ids"].append(tokenizer.eos_token_id)
            result["attention_mask"].append(1)
        elif isinstance(result["input_ids"][-1], list) and add_eos_token:
            for i in range(len(result['input_ids'])):
                if result["input_ids"][i][-1] != tokenizer.eos_token_id and len(result["input_ids"][i]) < cutoff:
                    result["input_ids"][i].append(tokenizer.eos_token_id)
                    result["attention_mask"][i].append(1)
        result["labels"] = result["input_ids"].copy()
        return result

    def process_func(datapoint):
        encoding = tokenize(datapoint['input'], add_eos_token=True)
        labels = tokenize(datapoint['output'], add_eos_token=True)
        encoding['labels'] = labels['input_ids'].copy()
        # return encoding
        return {**datapoint, **encoding}

    tokenizer.pad_token_id = (
        0  # unk. we want this to be different from the eos token
    )
    tokenizer.padding_side = "left"
    train_set = train_data['train'].shuffle().map(process_func, batched=True)
    valid_set = valid_data['train'].shuffle().map(process_func, batched=True)
    output_dir = os.path.join(model_dir, dataset, item_indexing, backbone)
    trainer = Trainer(
        model=model,
        train_dataset=train_set,
        eval_dataset=valid_set if valid_select > 0 else None,
        args=TrainingArguments(
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            warmup_steps=warmup_steps,
            num_train_epochs=epochs,
            learning_rate=lr,
            logging_steps=logging_steps,
            optim=optim,
            evaluation_strategy="steps" if valid_select > 0 else "no",
            save_strategy="steps",
            eval_steps=eval_steps if valid_select > 0 else None,
            save_steps=save_steps,
            output_dir=output_dir,
            save_total_limit=save_total_limit,
            load_best_model_at_end=True if valid_select > 0 else False,
            group_by_length=False,
        ),
        data_collator=transformers.DataCollatorForSeq2Seq(
            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
        ),
    )
    trainer.train()  # 进行模型训练
    model.save_pretrained(output_dir)  # 保存预训练好的模型
    tokenizer.save_pretrained(output_dir)  # 保存token


if __name__ == "__main__":
    fire.Fire(main)

文件pre-train.sh

#!/bin/bash

dir_path="../logs/mind/"

if [ ! -d "$dir_path" ]; then
    mkdir -p "$dir_path"
fi

PYTORCH_ENABLE_MPS_FALLBACK=1 torchrun t5_pre-train.py --item_indexing sequential \
--task sequential,straightforward --dataset mind --epochs 1 --batch_size 1024 \
--backbone t5-small --cutoff 1024 --data_path /Users/liuqiang/Desktop/code/llm4rec/llm4rec_abc/src/basic_skills/train-llm/data \
--valid_prompt seen:0 --model_dir /Users/liuqiang/Desktop/code/llm4rec/llm4rec_abc/src/basic_skills/train-llm/models \
--lr 1e-3 --valid_select 1 --warmup_steps 100 --gradient_accumulation_steps 10 --logging_steps 10 --optim 'adamw_torch' \
--eval_steps 200 --save_steps 200 --save_total_limit 3

3.3 模型推理与验证

文件t5_evaluate.py

import logging
import os
import sys

import fire
import numpy as np
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import (
    T5ForConditionalGeneration,
    AutoTokenizer,
)

sys.path.append('../')
from utils import EvaluationDataset, evaluation_results, get_metrics_results


def main(log_dir: str, checkpoint_path: str, data_path: str, item_indexing: str, task: str,
         dataset: str, cutoff: int, test_prompt: str, eval_batch_size: int, metrics: str):
    # setup
    log_file = os.path.join(log_dir, dataset,
                            checkpoint_path.replace('.', '').replace('/', '_') + '.log')

    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename=log_file, level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    model = T5ForConditionalGeneration.from_pretrained(checkpoint_path)
    tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
    tokenizer.pad_token_id = (
        0  # unk. we want this to be different from the eos token
    )
    tokenizer.padding_side = "left"
    # load test data
    test_data_file = os.path.join(data_path, dataset,
                                  f'{dataset}_{task}_{item_indexing}_test_{test_prompt}.json')
    logging.info("test_data_file=" + test_data_file)
    test_data = load_dataset("json", data_files=test_data_file, field='data')
    model.eval()
    metrics = list(metrics)
    generate_num = max([int(m.split('@')[1]) for m in metrics])
    task_list = np.unique(test_data['train']['task'])
    for t in task_list:
        logging.info(f'testing on {t}')
        subset_data = test_data.filter(lambda example: example['task'] == t)
        dataset = EvaluationDataset(subset_data['train'], tokenizer, cutoff)
        dataloader = DataLoader(dataset, batch_size=eval_batch_size, shuffle=False)
        test_total = 0
        metrics_res = np.array([0.0] * len(metrics))
        for batch in tqdm(dataloader):
            """
            下面是一个batch的案例：
                {'input_ids': tensor([[    3, 21419, 12587,  ...,     0,     0,     0],
                ...,
                [    3, 21419, 12587,  ...,     0,     0,     0]]), 
                
                'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
                ...,
                [1, 1, 1,  ..., 0, 0, 0]]), 
                
                'label': tensor([[12587,  2118,   834, 22504,  2577,     1,     0],
                [12587,  2118,   834, 19993,  4867,     1,     0],
                ...,
                [12587,  2118,   834, 19993,  5062,     1,     0]])}
            """

            prediction = model.generate(  # 大模型模型生成函数
                input_ids=batch["input_ids"],  # torch.LongTensor of shape (batch_size, sequence_length)
                attention_mask=batch["attention_mask"],  # torch.FloatTensor of shape (batch_size, sequence_length)
                max_length=30,
                num_beams=generate_num,
                num_return_sequences=generate_num,
                output_scores=True,
                return_dict_in_generate=True,
            )
            output_ids = batch['label']
            prediction_ids = prediction["sequences"]  # 利用大模型进行预测，输出的向量化的，需要解码
            prediction_scores = prediction["sequences_scores"]
            gold_sents = tokenizer.batch_decode(  # 用户真实的点击记录
                output_ids, skip_special_tokens=True
            )
            generated_sents = tokenizer.batch_decode(  # 大模型预测的点击记录
                prediction_ids, skip_special_tokens=True
            )
            rel_results = evaluation_results(generated_sents, gold_sents, prediction_scores, generate_num)
            test_total += len(rel_results)
            metrics_res += get_metrics_results(rel_results, metrics)

        metrics_res /= test_total
        for i in range(len(metrics)):
            logging.info(f'{metrics[i]}: {metrics_res[i]}')


if __name__ == "__main__":
    fire.Fire(main)

执行t5_evaluate.sh


python t5_evaluate.py --dataset mind --task sequential,straightforward --item_indexing sequential --backbone t5-small \
--checkpoint_path /Users/liuqiang/Desktop/code/llm4rec/llm4rec_abc/src/basic_skills/train-llm/models/mind/sequential/t5-small/ \
 --test_prompt seen:0 --log_dir '../logs' \
--data_path /Users/liuqiang/Desktop/code/llm4rec/llm4rec_abc/src/basic_skills/train-llm/data \
--cutoff 1024 --eval_batch_size 32 --metrics hit@5,hit@10,ndcg@5,ndcg@10


python t5_evaluate.py --dataset mind --task sequential,straightforward --item_indexing sequential --backbone t5-small \
--checkpoint_path /Users/liuqiang/Desktop/code/llm4rec/llm4rec_abc/src/basic_skills/train-llm/models/mind/sequential/t5-small/ \
--test_prompt unseen:0 --log_dir '../logs' \
--data_path /Users/liuqiang/Desktop/code/llm4rec/llm4rec_abc/src/basic_skills/train-llm/data \
--cutoff 1024 --eval_batch_size 32 --metrics hit@5,hit@10,ndcg@5,ndcg@10