# -*- coding:utf-8 -*-
"""
Author:
Zichao Li, 2843656167@qq.com
Reference:
Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:https://doi.org/10.1145/3326937.3341261
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import (Dense, Flatten)
from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
from ...inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \
get_dense_input
from ...layers.core import DNN, PredictionLayer
from ...layers.sequence import Transformer, AttentionSequencePoolingLayer
from ...layers.utils import concat_func, combined_dnn_input
[docs]def BST(dnn_feature_columns, history_feature_list, transformer_num=1, att_head_num=8,
use_bn=False, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_dnn=0,
l2_reg_embedding=1e-6, dnn_dropout=0.0, seed=1024, task='binary'):
"""Instantiates the BST architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param history_feature_list: list, to indicate sequence sparse field.
:param transformer_num: int, the number of transformer layer.
:param att_head_num: int, the number of heads in multi-head self attention.
:param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param dnn_activation: Activation function to use in DNN
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
user_behavior_length = features["seq_length"]
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
history_feature_columns = []
sparse_varlen_feature_columns = []
history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
if feature_name in history_fc_names:
history_feature_columns.append(fc)
else:
sparse_varlen_feature_columns.append(fc)
embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="",
seq_mask_zero=True)
query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
return_feat_list=history_feature_list, to_list=True)
hist_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns,
return_feat_list=history_fc_names, to_list=True)
dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
mask_feat_list=history_feature_list, to_list=True)
dense_value_list = get_dense_input(features, dense_feature_columns)
sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns)
sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns,
to_list=True)
dnn_input_emb_list += sequence_embed_list
query_emb = concat_func(query_emb_list)
deep_input_emb = concat_func(dnn_input_emb_list)
hist_emb = concat_func(hist_emb_list)
transformer_output = hist_emb
for _ in range(transformer_num):
att_embedding_size = transformer_output.get_shape().as_list()[-1] // att_head_num
transformer_layer = Transformer(att_embedding_size=att_embedding_size, head_num=att_head_num,
dropout_rate=dnn_dropout, use_positional_encoding=True, use_res=True,
use_feed_forward=True, use_layer_norm=True, blinding=False, seed=seed,
supports_masking=False, output_type=None)
transformer_output = transformer_layer([transformer_output, transformer_output,
user_behavior_length, user_behavior_length])
attn_output = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True,
supports_masking=False)([query_emb, transformer_output,
user_behavior_length])
deep_input_emb = concat_func([deep_input_emb, attn_output], axis=-1)
deep_input_emb = Flatten()(deep_input_emb)
dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input)
final_logit = Dense(1, use_bias=False)(output)
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model