Source code for kaggle_metrics.order_based

import numpy as np
from sklearn.preprocessing import binarize
from kaggle_metrics.utils import check_shapes, \
    confusion_binary, align_shape, check_binary


[docs]def average_precision_at_k(true_positive):
    # TODO: accept several types of input
    '''
    Average precision at position k

    Parameters
    ----------
    true_positive: numpy.ndarray
        True positive for ordered values in query

    Returns
    ------
    score: numpy.ndarray
        A vector of average precision score for every k-th point

    References
    ----------
    .. [1] https://towardsdatascience.com/breaking-down-mean-average-precision-map-ae462f623a52
    .. [2] https://medium.com/@jonathan_hui/map-mean-average-precision-for-object-detection-45c121a31173

    '''
    tp_cumsum = np.cumsum(true_positive)
    val_counter = np.cumsum(np.ones(len(true_positive)))
    return np.cumsum(tp_cumsum * true_positive / val_counter) / tp_cumsum


[docs]def average_precision(true_positive):
    # TODO: find columnwise version of Average Precision
    '''
    Average precision

    Parameters
    ----------
    true_positive: numpy.ndarray
        True positive for ordered values in query

    Returns
    ------
    score: numpy.ndarray
        A vector of average precision score

    References
    ----------
    .. [1] https://towardsdatascience.com/breaking-down-mean-average-precision-map-ae462f623a52
    .. [2] https://medium.com/@jonathan_hui/map-mean-average-precision-for-object-detection-45c121a31173

    '''
    return average_precision_at_k(true_positive)[-1]


[docs]def mean_average_precision(true_positive):
    '''
    Mean average precision

    Parameters
    ----------
    true_positive: numpy.ndarray
        True positive values for n queries (n_queries, answers)

    Returns
    ------
    score: float
        Mean average precision score

    References
    ----------
    .. [1] https://towardsdatascience.com/breaking-down-mean-average-precision-map-ae462f623a52
    .. [2] https://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision
    .. [3] https://medium.com/@jonathan_hui/map-mean-average-precision-for-object-detection-45c121a31173

    '''
    map_per_query = np.apply_along_axis(average_precision, 1, true_positive)
    return map_per_query.mean()



# Aliases
ap_at_k = average_precision_at_k
ap = average_precision
map = mean_average_precision

# TODO: try to find Average Among Top P (formerly described in one of Kaggle sites)

if __name__ == "__main__":
    y_pred = np.array([0.2, 0.3, 0.6, 0.7, 0.8, 0.1, 0.5, 0.55, 0.49])
    y_true = np.array([1, 0, 1, 1, 1, 0, 1, 1, 0])
    y_true2 = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0])
    y_true3 = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0],
                        [1, 1, 0, 1, 0, 1, 0, 0, 0],
                        [1, 1, 0, 0, 1, 1, 1, 0, 0],
                        [1, 1, 1, 1, 0, 1, 0, 0, 0],
                        [0, 1, 1, 1, 0, 1, 0, 0, 0]])
    # y_pred = np.array([1, 0, 1, 1, 0, 0, 1, 0, 0])

    # print(average_precision_at_k(y_true2))
    # print(mean_average_precision(y_true3))

    print(roc_auc(y_true, y_pred))
    # from sklearn.metrics import roc_auc_score
    # print(roc_auc_score(y_true, y_pred))