Source code for statspy.hypotest

"""module hypotest.py

This module contains functions to perform hypothesis tests.

"""

import logging
import math
import scipy.special
import scipy.stats
import statspy as sp

__all__ =['Result','pvalue_to_Zvalue','Zvalue_to_pvalue','pllr']

# Logging system
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
_ch = logging.StreamHandler() # Console handler
_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
_ch.setFormatter(_formatter)
logger.addHandler(_ch)
logger.debug('logger has been set to DEBUG mode')

[docs]class Result(dict): """Class to store results from an hypothesis test. Among the variables stored in this class, there are: * the ``pvalue`` which is defined as Prob(t >= tobs | H0) with t the test statistics. If this value is lower than the predefined type I error rate, then the null hypothesis is rejected. * the ``Zvalue``, the standard score (or Z-score), is the p-value expressed in the number of standard deviations. """ def __getattr__(self, name): try: return self[name] except KeyError: raise AttributeError(name) __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ def __repr__(self): if self.keys(): m = max(map(len, list(self.keys()))) + 1 return '\n'.join([k.rjust(m) + ': ' + repr(v) for k, v in self.items()]) else: return self.__class__.__name__ + "()"
[docs]def pvalue_to_Zvalue(pvalue, mode='two-sided'): """Convert a p-value to a Z-value. Definition:: >>> math.sqrt(2.) * scipy.special.erfcinv(mode * pvalue) mode is equal to 2 for a two-sided Z-value and to 1 for a one-sided Z-value. Parameters ---------- pvalue : float p-value defined as Prob(t >= tobs | H0) mode : str 'two-sided' (default) or 'one-sided' Returns ------- Zvalue : float Z-value corresponding to the number of standard deviations """ try: mode = 2. if mode == 'two-sided' else 1. if pvalue < 1.24e-15: logger.warning('when pvalue is lower than 1.24e-15, Z-value is set to 8.') Zvalue = 8. else: Zvalue = math.sqrt(2.) * scipy.special.erfcinv(mode * pvalue) except: raise return Zvalue
[docs]def Zvalue_to_pvalue(Zvalue, mode='two-sided'): """Convert a two-sided Z-value to a p-value. Definition:: >>> scipy.special.erfc(Zvalue / math.sqrt(2.)) / mode mode is equal to 2 for a two-sided Z-value and to 1 for a one-sided Z-value. Parameters ---------- Zvalue : float Z-value corresponding to the number of standard deviations mode : str 'two-sided' (default) or 'one-sided' Returns ------- pvalue : float p-value defined as Prob(t >= tobs | H0) """ try: mode = 2. if mode == 'two-sided' else 1. pvalue = scipy.special.erfc(Zvalue / math.sqrt(2.)) / mode except: raise return pvalue
[docs]def pllr(pf, data, **kw): """Profile likelihood ratio test. For the likelihood ratio test, the likelihood is maximized separately for the null and the alternative hypothesis. The word "profile" means that in addition, the likelihood is maximized wrt the nuisance parameters. The test statistics is then defined as:: l = L(x|theta_r,\hat{\hat{theta_s}}) / L(x|\hat{theta_r},\hat{theta_s}) q_obs = -2 * log(l) and is distributed asymptotically as a chi2 distribution. q_obs can be used to compute a p-value = Pr(q >= q_obs). Parameters ---------- pf : statspy.core.PF Probability function used in the computed of the likelihood data : ndarray, tuple x - variates used in the computation of the likelihood kw : keyword arguments (optional) Returns ------- result : statspy.hypotest.Result All information about the test is stored in the Result class. """ # Get initial values of parameters params = pf.get_list_free_params() popt = [None] * len(params) for ipar,par in enumerate(params): popt[ipar] = par.value result = Result() result.pf = pf result.data = data result.pllr = pf.pllr(data) result.Zvalue = math.sqrt(result.pllr) result.pvalue = scipy.stats.chi2.sf(result.pllr, 1) all_discrete = True for rv in pf._rvs: if rv[3] == sp.RV.DISCRETE: continue all_discrete = False break if all_discrete: result.pvalue = result.pvalue + scipy.stats.chi2.pdf(result.pllr, 1) logger.debug('PLLR test: pvalue = %f, Zvalue = %f' % (result.pvalue, result.Zvalue)) # Restore initial value of parameters for ipar,par in enumerate(params): par.value = popt[ipar] return result