Source code for deepctr.models.dien

# -*- coding:utf-8 -*-
"""
Author:
    Weichen Shen,wcshen1994@163.com

Reference:
    [1] Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018. (https://arxiv.org/pdf/1809.03672.pdf)
"""

import tensorflow as tf
from tensorflow.python.keras.layers import (Concatenate, Dense, Input, Permute, multiply)

from ..feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
from ..inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \
    get_dense_input
from ..layers.core import DNN, PredictionLayer
from ..layers.sequence import AttentionSequencePoolingLayer, DynamicGRU
from ..layers.utils import concat_func, reduce_mean, combined_dnn_input


def auxiliary_loss(h_states, click_seq, noclick_seq, mask, stag=None):
    #:param h_states:
    #:param click_seq:
    #:param noclick_seq: #[B,T-1,E]
    #:param mask:#[B,1]
    #:param stag:
    #:return:
    hist_len, _ = click_seq.get_shape().as_list()[1:]
    mask = tf.sequence_mask(mask, hist_len)
    mask = mask[:, 0, :]

    mask = tf.cast(mask, tf.float32)

    click_input_ = tf.concat([h_states, click_seq], -1)

    noclick_input_ = tf.concat([h_states, noclick_seq], -1)

    click_prop_ = auxiliary_net(click_input_, stag=stag)[:, :, 0]

    noclick_prop_ = auxiliary_net(noclick_input_, stag=stag)[
                    :, :, 0]  # [B,T-1]

    try:
        click_loss_ = - tf.reshape(tf.log(click_prop_),
                                   [-1, tf.shape(click_seq)[1]]) * mask
    except:
        click_loss_ = - tf.reshape(tf.compat.v1.log(click_prop_),
                                   [-1, tf.shape(click_seq)[1]]) * mask
    try:
        noclick_loss_ = - \
                            tf.reshape(tf.log(1.0 - noclick_prop_),
                                       [-1, tf.shape(noclick_seq)[1]]) * mask
    except:
        noclick_loss_ = - \
                            tf.reshape(tf.compat.v1.log(1.0 - noclick_prop_),
                                       [-1, tf.shape(noclick_seq)[1]]) * mask

    loss_ = reduce_mean(click_loss_ + noclick_loss_)

    return loss_


def auxiliary_net(in_, stag='auxiliary_net'):
    try:
        bn1 = tf.layers.batch_normalization(
            inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE)
    except:
        bn1 = tf.compat.v1.layers.batch_normalization(
            inputs=in_, name='bn1' + stag, reuse=tf.compat.v1.AUTO_REUSE)

    try:  # todo
        dnn1 = tf.layers.dense(bn1, 100, activation=None,
                               name='f1' + stag, reuse=tf.AUTO_REUSE)
    except:
        dnn1 = tf.compat.v1.layers.dense(bn1, 100, activation=None,
                                         name='f1' + stag, reuse=tf.compat.v1.AUTO_REUSE)

    dnn1 = tf.nn.sigmoid(dnn1)
    try:
        dnn2 = tf.layers.dense(dnn1, 50, activation=None,
                               name='f2' + stag, reuse=tf.AUTO_REUSE)
    except:
        dnn2 = tf.compat.v1.layers.dense(dnn1, 50, activation=None,
                                         name='f2' + stag, reuse=tf.compat.v1.AUTO_REUSE)

    dnn2 = tf.nn.sigmoid(dnn2)
    try:
        dnn3 = tf.layers.dense(dnn2, 1, activation=None,
                               name='f3' + stag, reuse=tf.AUTO_REUSE)
    except:
        dnn3 = tf.compat.v1.layers.dense(dnn2, 1, activation=None,
                                         name='f3' + stag, reuse=tf.compat.v1.AUTO_REUSE)

    y_hat = tf.nn.sigmoid(dnn3)

    return y_hat


def interest_evolution(concat_behavior, deep_input_item, user_behavior_length, gru_type="GRU", use_neg=False,
                       neg_concat_behavior=None, att_hidden_size=(64, 16), att_activation='sigmoid',
                       att_weight_normalization=False, ):
    if gru_type not in ["GRU", "AIGRU", "AGRU", "AUGRU"]:
        raise ValueError("gru_type error ")
    aux_loss_1 = None
    embedding_size = None
    rnn_outputs = DynamicGRU(embedding_size, return_sequence=True,
                             name="gru1")([concat_behavior, user_behavior_length])

    if gru_type == "AUGRU" and use_neg:
        aux_loss_1 = auxiliary_loss(rnn_outputs[:, :-1, :], concat_behavior[:, 1:, :],

                                    neg_concat_behavior[:, 1:, :],

                                    tf.subtract(user_behavior_length, 1), stag="gru")  # [:, 1:]

    if gru_type == "GRU":
        rnn_outputs2 = DynamicGRU(embedding_size, return_sequence=True,
                                  name="gru2")([rnn_outputs, user_behavior_length])
        # attention_score = AttentionSequencePoolingLayer(hidden_size=att_hidden_size, activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([
        #     deep_input_item, rnn_outputs2, user_behavior_length])
        # outputs = Lambda(lambda x: tf.matmul(x[0], x[1]))(
        #     [attention_score, rnn_outputs2])
        # hist = outputs
        hist = AttentionSequencePoolingLayer(att_hidden_units=att_hidden_size, att_activation=att_activation,
                                             weight_normalization=att_weight_normalization, return_score=False)([
            deep_input_item, rnn_outputs2, user_behavior_length])

    else:  # AIGRU AGRU AUGRU

        scores = AttentionSequencePoolingLayer(att_hidden_units=att_hidden_size, att_activation=att_activation,
                                               weight_normalization=att_weight_normalization, return_score=True)([
            deep_input_item, rnn_outputs, user_behavior_length])

        if gru_type == "AIGRU":
            hist = multiply([rnn_outputs, Permute([2, 1])(scores)])
            final_state2 = DynamicGRU(embedding_size, gru_type="GRU", return_sequence=False, name='gru2')(
                [hist, user_behavior_length])
        else:  # AGRU AUGRU
            final_state2 = DynamicGRU(embedding_size, gru_type=gru_type, return_sequence=False,
                                      name='gru2')([rnn_outputs, user_behavior_length, Permute([2, 1])(scores)])
        hist = final_state2
    return hist, aux_loss_1


[docs]def DIEN(dnn_feature_columns, history_feature_list, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) user_behavior_length = Input(shape=(1,), name='seq_length') sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] neg_history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) neg_history_fc_names = list(map(lambda x: "neg_" + x, history_fc_names)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) elif feature_name in neg_history_fc_names: neg_history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="", seq_mask_zero=False) query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, return_feat_list=history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, return_feat_list=history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list) if use_negsampling: neg_uiseq_embed_list = embedding_lookup(embedding_dict, features, neg_history_feature_columns, neg_history_fc_names, to_list=True) neg_concat_behavior = concat_func(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(dnn_input) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) # model_input_list = get_inputs_list( # [sparse_input, dense_input, user_behavior_input]) model_input_list = inputs_list # if use_negsampling: # model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) try: tf.keras.backend.get_session().run(tf.global_variables_initializer()) except: tf.compat.v1.keras.backend.get_session().run(tf.compat.v1.global_variables_initializer()) return model