专栏首页Soul Joy Hub深度推荐模型——BST [KDD 19][Alibaba]

深度推荐模型——BST [KDD 19][Alibaba]

受Transformer在自然语言处理中取得巨大的效果启发,BST将应用Transformer 用于提取用户行为序列背后的隐藏信息,同时考虑序列的前后顺序,能够更好的表达用户兴趣。

import tensorflow as tf
from tensorflow import keras
from utils import *
import numpy as np

EPOCH = 10
BATCH_SIZE = 32
VEC_DIM = 16
DROPOUT_RATE = 0.2
HEAD_NUM = 8
HIDE_SIZE = 32
LAYER_NUM = 3
DNN_LAYERS = [1024, 512, 256]
data, max_user_id, max_item_id = load_data()
# 行为特征个数
BEHAVIOR_FEAT_NUM = 32
K = tf.keras.backend


class MultiHeadSelfAttention(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = keras.layers.Dense(embed_dim)
        self.key_dense = keras.layers.Dense(embed_dim)
        self.value_dense = keras.layers.Dense(embed_dim)
        self.combine_heads = keras.layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        key_dim = tf.cast(tf.shape(key).shape[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(key_dim)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, shape=(batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs, **kwargs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.key_dense(inputs)
        key = self.separate_heads(key, batch_size)
        value = self.value_dense(inputs)
        value = self.separate_heads(value, batch_size)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])  # [batch_size,seq_len,num_heads,projection_dim]
        attention = tf.reshape(attention, shape=(batch_size, -1, self.embed_dim))
        return self.combine_heads(attention) # [batch_size,seq_len,embed_dim]


class TransformerBlock(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential([keras.layers.Dense(ff_dim, activation="relu"),
                                     keras.layers.Dense(embed_dim)])
        self.layernorm1 = keras.layers.LayerNormalization()
        self.layernorm2 = keras.layers.LayerNormalization()
        self.dropout1 = keras.layers.Dropout(DROPOUT_RATE)
        self.dropout2 = keras.layers.Dropout(DROPOUT_RATE)

    def call(self, inputs, **kwargs):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)


def run():
    # 将所有的特征的各个类别值统一id化。x中每行为各特征的类别值的id
    train_user_id_data, train_now_item_id_data, train_item_ids_data, train_rating_ids_data, train_y_data, \
    test_user_id_data, test_now_item_id_data, test_item_ids_data, test_rating_ids_data, test_y_data = get_all_data(data)

    user_id = keras.Input((1,))
    now_item_id = keras.Input((1,))
    items_ids = keras.Input((BEHAVIOR_FEAT_NUM,))
    ratings_ids = keras.Input((BEHAVIOR_FEAT_NUM,))

    usr_emb = keras.layers.Embedding(max_user_id + 1, VEC_DIM, input_length=1)(user_id)  # [-1,1,vec_dim]
    usr_emb = keras.layers.Flatten()(usr_emb)  # [-1,vec_dim]
    now_item_emb = keras.layers.Embedding(max_item_id + 1, VEC_DIM, input_length=1)(now_item_id)  # [-1,1,vec_dim]

    # TokenAndPositionEmbedding
    items_emb = keras.layers.Embedding(max_item_id + 1, VEC_DIM, input_length=BEHAVIOR_FEAT_NUM)(
        items_ids)  # [-1,BEA_FEAT_NUM,vec_dim]
    ratings_emb = keras.layers.Embedding(6, VEC_DIM, input_length=BEHAVIOR_FEAT_NUM)(
        ratings_ids)  # [-1,BEA_FEAT_NUM,vec_dim]
    token_embedding = items_emb * ratings_emb  # [-1,BEA_FEAT_NUM,vec_dim]
    token_embedding = keras.layers.concatenate([token_embedding, now_item_emb], axis=1)  # [-1,BEA_FEAT_NUM + 1,vec_dim]
    positions = tf.range(start=0, limit=BEHAVIOR_FEAT_NUM + 1, delta=1)
    positions_embedding = keras.layers.Embedding(BEHAVIOR_FEAT_NUM + 1, VEC_DIM,
                                                 input_length=BEHAVIOR_FEAT_NUM + 1)(positions)
    token_and_positions_embedding = token_embedding + positions_embedding

    transformer_layer = TransformerBlock(embed_dim=VEC_DIM, num_heads=HEAD_NUM, ff_dim=HIDE_SIZE)(
        token_and_positions_embedding)  # [-1,BEA_FEAT_NUM + 1,vec_dim]
    print(np.shape(transformer_layer))
    transformer_layer = keras.layers.Flatten()(transformer_layer)
    deep = keras.layers.concatenate([usr_emb] + [transformer_layer])
    deep = keras.layers.Dropout(DROPOUT_RATE)(deep)

    for units in DNN_LAYERS:
        deep = keras.layers.Dense(units)(deep)
        deep = keras.layers.LeakyReLU()(deep)
        deep = keras.layers.Dropout(DROPOUT_RATE)(deep)

    outputs = keras.layers.Dense(1, activation='sigmoid')(deep)

    model = keras.Model(inputs=[user_id, now_item_id, items_ids, ratings_ids], outputs=outputs)
    model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer(0.001), metrics=[keras.metrics.AUC()])
    tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs',
                                             histogram_freq=0,
                                             write_graph=True,
                                             write_grads=True,
                                             write_images=True,
                                             embeddings_freq=0,
                                             embeddings_layer_names=None,
                                             embeddings_metadata=None)

    model.fit([train_user_id_data, train_now_item_id_data, train_item_ids_data, train_rating_ids_data], train_y_data,
              batch_size=BATCH_SIZE, epochs=EPOCH, verbose=2,
              validation_data=(
                  [test_user_id_data, test_now_item_id_data, test_item_ids_data, test_rating_ids_data], test_y_data),
              callbacks=[tbCallBack], workers=4)


run()

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

我来说两句

0 条评论
登录 后参与评论

相关文章

  • 20篇最值得一读的深度推荐系统与CTR预估论文

    微博上近日流传一个段子,“2020年曾是各大科幻片中遥远的未来,但是现在离这个遥远的未来也只有6个月时间了”。只是借此感慨一下2019年转瞬之间半年的时间已经过...

    AI科技大本营
  • 知道吗?BAT去年在KDD上作为第一单位发表了12篇文章!(内附每篇文章解读)

    如今,全世界每天都有几十亿人在使用计算机、平板电脑、手机和其它数字设备产生海量数据。各个行业和领域都已经被数据给渗透,数据已成为非常重要的生产因素的大数据时代,...

    量子位
  • Embedding从入门到专家必读的十篇论文

    今天我们不分析论文,而是总结一下Embedding方法的学习路径,这也是我三四年前从接触word2vec,到在推荐系统中应用Embedding,再到现在逐渐从传...

    zenRRan
  • KDD 2019高维稀疏数据上的深度学习Workshop论文汇总

    【导读】本文是“深度推荐系统”专栏的第九篇文章,这个系列将介绍在深度学习的强力驱动下,给推荐系统工业界所带来的最前沿的变化。本文简要总结一下阿里妈妈在 KDD ...

    AI科技大本营
  • 深度推荐模型——PNN [TOIS 16][交大]

    视频讲解:https://www.yuque.com/chudi/tzqav9/ny150b#aalY8

    用户1621453
  • 深度推荐模型——FNN [ECIR 16]

    视频讲解:https://www.yuque.com/chudi/tzqav9/ny150b#aalY8

    用户1621453
  • 深度推荐模型——FFM

    视频讲解:https://www.yuque.com/chudi/tzqav9/ny150b#aalY8

    用户1621453
  • 深度推荐模型——FM

    视频讲解:https://www.yuque.com/chudi/tzqav9/ny150b#aalY8

    用户1621453
  • 深度推荐模型——FiBiNet[RecSys 19][Weibo]

    微博提出的FiBiNet相当于对FNN进行了两部分的改进: 1、SENET Layer。作者认为模型需要学习不同特征的一个重要程度,对重要特征加权,对蕴含信息...

    用户1621453

扫码关注云+社区

领取腾讯云代金券