视频讲解:https://www.yuque.com/chudi/tzqav9/ny150b#aalY8
import tensorflow as tf
from tensorflow import keras
from utils import *
EPOCH = 10
BATCH_SIZE = 32
VEC_DIM = 10
DNN_LAYERS = [64, 128, 64]
DROPOUT_RATE = 0.5
base, test = loadData()
# 所有的特征各个类别值个数之和
FEAT_CATE_NUM = base.shape[1] - 1
K = tf.keras.backend
class CrossLayer(keras.layers.Layer):
def __init__(self, feat_num, vec_dim, **kwargs):
self.feat_num = feat_num
self.vec_dim = vec_dim
super(CrossLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.V = self.add_weight(name='V', shape=(self.feat_num, self.vec_dim), initializer='uniform', trainable=True)
super(CrossLayer, self).build(input_shape)
def call(self, inputs, **kwargs):
a = K.pow(K.dot(inputs, self.V), 2)
b = K.dot(K.pow(inputs, 2), K.pow(self.V, 2))
return 0.5 * K.mean(a - b, 1, keepdims=True)
def run():
# 返回id化特征 和 one-hot特征
val_x_id, val_x_hot, val_y = getAllData(test)
train_x_id, train_x_hot, train_y = getAllData(base)
cate_num = val_x_id[0].shape[0]
hot_num = val_x_hot[0].shape[0]
sub_emb_arr = []
product_list = []
# Deep 部分
inputs_id = keras.Input((cate_num,))
emb = keras.layers.Embedding(FEAT_CATE_NUM, VEC_DIM, input_length=cate_num)(inputs_id)
deep = keras.layers.Flatten()(emb)
deep = keras.layers.Dropout(DROPOUT_RATE)(deep)
for units in DNN_LAYERS:
deep = keras.layers.Dense(units, activation='relu')(deep)
deep = keras.layers.Dropout(DROPOUT_RATE)(deep)
# FM 部分
# 将emb切分成各个field的小emb
split_arr = tf.split(emb, cate_num, 1)
for split in split_arr:
sub_emb_arr.append(keras.layers.Flatten()(split))
# 内积
for i in range(0, len(sub_emb_arr)):
for j in range(i + 1, len(sub_emb_arr)):
product_list.append(keras.layers.Dot(axes=1)([sub_emb_arr[i], sub_emb_arr[j]]))
wide = keras.Input((hot_num,))
deep_fm = keras.layers.concatenate(product_list + [wide] + [deep])
deep_fm = keras.layers.Dropout(DROPOUT_RATE)(deep_fm)
outputs = keras.layers.Dense(1, activation='sigmoid',kernel_regularizer=keras.regularizers.l2(0.001))(deep_fm)
model = keras.Model(inputs=[inputs_id, wide], outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer(0.001), metrics=[keras.metrics.AUC()])
tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs',
histogram_freq=0,
write_graph=True,
write_grads=True,
write_images=True,
embeddings_freq=0,
embeddings_layer_names=None,
embeddings_metadata=None)
model.fit([train_x_id, train_x_hot], train_y, batch_size=BATCH_SIZE, epochs=EPOCH, verbose=2,
validation_data=([val_x_id, val_x_hot], val_y),
callbacks=[tbCallBack])
run()