Source code for atml.measure

"""
The :mod:'atml.measure' module contains a set of common evaluation measures for predictive machine learning tasks.
"""
# Author: Hao Song (nuacesh@gmail.com)
# License: BSD-3

import numpy
import scipy.integrate

tiny = numpy.finfo('float64').tiny


[docs]class Measure:
    """
    The base measure class, and specifies the corresponding task type for the measure. (e.g. classification)
    """

    def __init__(self, task):
        """

        Parameters
        ----------
        task: string

        """
        self.task = task
        
        
[docs]class AUC(Measure):
    """
    Area under the ROC curve

    """

    def __init__(self, target_positive=0):
        """

        Parameters
        ----------
        target_positive: int

        """

        super().__init__(task='classification')
        self.target_positive = target_positive
        self.name = 'area under the curve (class ' + str(self.target_positive+1) + 'vs rest)'
        
[docs]    def get_measure(self, s, y):
        """

        Parameters
        ----------
        s: numpy.ndarray

        y: numpy.ndarray

        Returns
        ----------
        auc: float

        """

        bin_edges = numpy.unique(1-s[:, self.target_positive])
        count_pos = numpy.histogram(1-s[y[:, self.target_positive] == 1,
                                        self.target_positive], bins=bin_edges, range=(0.0, 1.0))[0]
        count_neg = numpy.histogram(1-s[y[:, self.target_positive] != 1, self.target_positive],
                                    bins=bin_edges, range=(0.0, 1.0))[0]
        if numpy.sum(count_pos) == 0:
            count_pos[:] = 1.0
        if numpy.sum(count_neg) == 0:
            count_neg[:] = 1.0
        cdf_pos = numpy.hstack([0.0, 
                                numpy.cumsum(count_pos) / numpy.sum(count_pos), 
                                1.0])
        cdf_neg = numpy.hstack([0.0, 
                                numpy.cumsum(count_neg) / numpy.sum(count_neg), 
                                1.0])
        auc = scipy.integrate.trapz(cdf_pos, cdf_neg)
        return auc

[docs]    @staticmethod
    def transform(m):
        """

        Parameters
        ----------
        m: float

        Returns
        ----------
        m_hat: float

        """
        m_hat = m
        return m_hat


[docs]class BAcc(Measure):
    """
    Binary accuracy

    """

    def __init__(self, target_positive=0):
        """

        Parameters
        ----------
        target_positive: int

        """

        super().__init__(task='classification', target_positive=target_positive)
        self.name = 'accuracy (class ' + str(self.target_positive+1) + 'vs rest)'

[docs]    def get_measure(self, s, y):
        """

        Parameters
        ----------
        s: numpy.ndarray

        y: numpy.ndarray

        Returns
        ----------
        bacc: float

        """

        n = numpy.shape(s)[0]
        s_bin = numpy.zeros((n, 2))
        y_bin = numpy.zeros((n, 2))
        s_bin[:, 0] = s[:, self.target_positive]
        s_bin[:, 1] = 1.0 - s_bin[:, 0]
        y_bin[:, 0] = y[:, self.target_positive]
        y_bin[:, 1] = 1.0 - y_bin[:, 0]
        bacc = numpy.mean(numpy.argmax(s_bin, axis=1) == numpy.argmax(y_bin, axis=1))
        return bacc


[docs]class F1(Measure):
    """

    F1 score

    """

    def __init__(self, target_positive=0):
        """

        Parameters
        ----------
        target_positive: int

        """

        super().__init__(task='classification', target_positive=target_positive)
        self.name = 'F1 score (class ' + str(self.target_positive+1) + 'vs rest)'

[docs]    def get_measure(self, s, y):
        """

        Parameters
        ----------
        s: numpy.ndarray

        y: numpy.ndarray

        Returns
        ----------
        f1: float

        """

        y_hat = numpy.argmax(s, axis=1)
        y_label = numpy.argmax(y, axis=1)
        TP = numpy.sum((y_hat == self.target_positive) * (y_label == self.target_positive))
        FP = numpy.sum((y_hat == self.target_positive) * (y_label != self.target_positive))
        FN = numpy.sum((y_hat != self.target_positive) * (y_label == self.target_positive))
        if (TP + FP + FN) == 0:
            TP = 1
            FP = 1
            FN = 1
        f1 = 2 * TP / (2 * TP + FP + FN)
        return f1


[docs]class Acc(Measure):
    """

    multi-class accuracy

    """

    def __init__(self):
        """

        """

        super().__init__(task='classification')
        self.name = 'accuracy'

[docs]    @staticmethod
    def get_measure(s, y):
        """

        Parameters
        ----------
        s: numpy.ndarray

        y: numpy.ndarray

        Returns
        ----------
        acc: float

        """

        acc = numpy.mean(numpy.argmax(s, axis=1) == numpy.argmax(y, axis=1))

        return acc


[docs]class BS(Measure):
    """

    Brier score

    """

    def __init__(self):
        """

        """

        super().__init__(task='classification')
        self.name = 'Brier score'

[docs]    @staticmethod
    def get_measure(s, y):
        """

        Parameters
        ----------
        s: numpy.ndarray

        y: numpy.ndarray

        Returns
        ----------
        bs: float

        """
        bs = numpy.mean(numpy.sum((s - y) ** 2, axis=1))
        return bs

[docs]    @staticmethod
    def transform(m):
        """

        Parameters
        ----------
        m: float

        Returns
        ----------
        m_hat: float

        """
        m_hat = 1 - (m/2)
        return m_hat


[docs]class LL(Measure):
    """

    Logarithm loss (cross entropy)

    """

    def __init__(self):
        """

        """

        super().__init__(task='classification')
        self.name = 'Log loss (cross entropy)'

[docs]    @staticmethod
    def get_measure(s, y):
        """

        Parameters
        ----------
        s: numpy.ndarray

        y: numpy.ndarray

        Returns
        ----------
        ll: float

        """
        s[s <= tiny] = tiny
        ll = numpy.mean(numpy.sum(-numpy.log(s) * y, axis=1))
        return ll
Source code for atml.measure

@ML

Navigation

Related Topics