Source code for statspy.hypotest

"""module hypotest.py

This module contains functions to perform hypothesis tests.

"""

import logging
import math
import scipy.special
import scipy.stats
import statspy as sp

__all__ =['Result','pvalue_to_Zvalue','Zvalue_to_pvalue','pllr']

# Logging system
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
_ch = logging.StreamHandler() # Console handler
_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
_ch.setFormatter(_formatter)
logger.addHandler(_ch)
logger.debug('logger has been set to DEBUG mode')

[docs]class Result(dict):
    """Class to store results from an hypothesis test.

    Among the variables stored in this class, there are:

    * the ``pvalue`` which is defined as Prob(t >= tobs | H0) with t the test
      statistics. If this value is lower than the predefined type I error 
      rate, then the null hypothesis is rejected.
    * the ``Zvalue``, the standard score (or Z-score), is the p-value expressed
      in the number of standard deviations.

    """
    def __getattr__(self, name):
        try:
            return self[name]
        except KeyError:
            raise AttributeError(name)

    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __repr__(self):
        if self.keys():
            m = max(map(len, list(self.keys()))) + 1
            return '\n'.join([k.rjust(m) + ': ' + repr(v)
                              for k, v in self.items()])
        else:
            return self.__class__.__name__ + "()"

[docs]def pvalue_to_Zvalue(pvalue, mode='two-sided'):
    """Convert a p-value to a Z-value.

    Definition::

        >>> math.sqrt(2.) * scipy.special.erfcinv(mode * pvalue)

    mode is equal to 2 for a two-sided Z-value and to 1 for a one-sided
    Z-value.

    Parameters
    ----------
    pvalue : float
        p-value defined as Prob(t >= tobs | H0)
    mode : str
        'two-sided' (default) or 'one-sided'

    Returns
    -------
    Zvalue : float
        Z-value corresponding to the number of standard deviations

    """
    try:
        mode = 2. if mode == 'two-sided' else 1.
        if pvalue < 1.24e-15:
            logger.warning('when pvalue is lower than 1.24e-15, Z-value is set to 8.')
            Zvalue = 8.
        else:
            Zvalue = math.sqrt(2.) * scipy.special.erfcinv(mode * pvalue)
    except:
        raise
    return Zvalue

[docs]def Zvalue_to_pvalue(Zvalue, mode='two-sided'):
    """Convert a two-sided Z-value to a p-value.

    Definition::

        >>> scipy.special.erfc(Zvalue / math.sqrt(2.)) / mode

    mode is equal to 2 for a two-sided Z-value and to 1 for a one-sided
    Z-value.

    Parameters
    ----------
    Zvalue : float
        Z-value corresponding to the number of standard deviations
    mode : str
        'two-sided' (default) or 'one-sided'

    Returns
    -------
    pvalue : float
        p-value defined as Prob(t >= tobs | H0)

    """
    try:
        mode = 2. if mode == 'two-sided' else 1.
        pvalue = scipy.special.erfc(Zvalue / math.sqrt(2.)) / mode
    except:
        raise
    return pvalue

[docs]def pllr(pf, data, **kw):
    """Profile likelihood ratio test.

    For the likelihood ratio test, the likelihood is maximized separately for
    the null and the alternative hypothesis. The word "profile" means that
    in addition, the likelihood is maximized wrt the nuisance parameters.
    The test statistics is then defined as::

        l = L(x|theta_r,\hat{\hat{theta_s}}) / L(x|\hat{theta_r},\hat{theta_s})
        q_obs = -2 * log(l)

    and is distributed asymptotically as a chi2 distribution.
    q_obs can be used to compute a p-value = Pr(q >= q_obs). 

    Parameters
    ----------
    pf : statspy.core.PF
        Probability function used in the computed of the likelihood
    data : ndarray, tuple
        x - variates used in the computation of the likelihood 
    kw : keyword arguments (optional)

    Returns
    -------
    result : statspy.hypotest.Result
        All information about the test is stored in the Result class.

    """
    # Get initial values of parameters
    params = pf.get_list_free_params()
    popt = [None] * len(params)
    for ipar,par in enumerate(params):
        popt[ipar] = par.value

    result = Result()
    result.pf = pf
    result.data = data
    result.pllr = pf.pllr(data)
    result.Zvalue = math.sqrt(result.pllr)
    result.pvalue = scipy.stats.chi2.sf(result.pllr, 1)
    all_discrete = True
    for rv in pf._rvs:
        if rv[3] == sp.RV.DISCRETE: continue
        all_discrete = False
        break
    if all_discrete:
        result.pvalue = result.pvalue + scipy.stats.chi2.pdf(result.pllr, 1)
    logger.debug('PLLR test: pvalue = %f, Zvalue = %f' % (result.pvalue, 
                                                          result.Zvalue))

    # Restore initial value of parameters
    for ipar,par in enumerate(params):
        par.value = popt[ipar]

    return result
Navigation

Source code for statspy.hypotest

Quick search

Navigation