Source code for statspy.interval

"""module interval.py

This module contains functions to estimate confidence or credible intervals.

"""

import logging
import math
import scipy.optimize
import scipy.stats

# Logging system
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
_ch = logging.StreamHandler() # Console handler
_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
_ch.setFormatter(_formatter)
logger.addHandler(_ch)
logger.debug('logger has been set to DEBUG mode')

[docs]def pllr(pf, data, **kw): """Compute confidence intervals using a profile likelihood ratio method. Interval estimation is done through steps: * The (minus log-)likelihood is computed from ``pf`` and ``data`` via the ``PF.nllf`` method. * Best estimates \hat{theta_i} for each parameter theta_i are computed with the ``PF.maxlikelihood_fit`` method. * The confidence interval of theta_i around its best estimate is computed from a profile log-likelihood ratio function q defined as:: l = L(x|theta_i,\hat{\hat{theta_s}}) / L(x|\hat{theta_i},\hat{theta_s}) q(theta_i) = -2 * log(l) where L is the likelihood function and theta_s are the nuisance parameters. * q(theta_i) is assumed to be described as a chi2 distribution (Wilks' theorem). Bounds corresponding to a given confidence level (CL) are found by searching values for which q(theta_i) is equal to the chi2 quantile of CL:: quantile = scipy.stats.chi2.ppf(cl, 1) Parameters ---------- pf : statspy.core.PF Probability function used in the computed of the likelihood data : ndarray, tuple x - variates used in the computation of the likelihood kw : keyword arguments (optional) Possible keyword arguments are: cl : float Confidence level (0.6827 by default) ndf : int Number of degrees of freedom (1 by default) root_finder : scipy.optimize function Root finder algorithm (scipy.optimize.brentq by default) Returns ------- params : statspy.core.Param list List of the parameters for which a confidence interval has been extracted including updated 'value', 'neg_unc' and 'pos_unc' arguments. corr : ndarray Correlation matrix quantile : float Quantile used in the computation of bounds """ # Confidence level and corresponding quantile cl = kw.get('cl', 0.6827) ndf = kw.get('ndf', 1) quantile = scipy.stats.chi2.ppf(cl, ndf) logger.debug('confidence level = %f, ndf = %d, quantile = %f' % (cl, ndf, quantile)) # Algorithm used to find roots of (q - quantile = 0) root_finder = kw.get('root_finder', scipy.optimize.brentq) # Maximum likelihood estimates params, nllfmin = pf.maxlikelihood_fit(data) print pf._pcov popt = [] [ popt.append([par.value, par.unc]) for par in params ] corr = pf.corr() print popt,corr # Loop over parameters maxiter = kw.get('maxiter', 20) for ipar,par in enumerate(params): # set par as the only parameter of interest for ipar2,par2 in enumerate(params): par2.poi = False par2.value = popt[ipar2][0] par2.unc = popt[ipar2][1] par.poi = True logger.debug('%s max likelihood estimate = %f' % (par.name, par.value)) logger.debug('%s quadratic uncertainty = %f' % (par.name, par.unc)) # upper bound iter = 0 range = [popt[ipar][0], popt[ipar][0] + math.sqrt(quantile) * popt[ipar][1]] par.value = range[1] while (pf.pllr(data, uncond_nllf=nllfmin) < quantile and iter < maxiter): range[1] = range[1] + math.sqrt(quantile) * popt[ipar][1] par.value = range[1] iter += 1 par.value = range[0] logger.debug('%s upper bound root finding range is %s' % (par.name, range)) upper_bound = root_finder(_pllr_root_finding, range[0], range[1], args=(pf, data, par, quantile, nllfmin)) logger.debug('%s upper bound = %f' % (par.name, upper_bound)) # lower bound iter = 0 range = [popt[ipar][0] - math.sqrt(quantile) * popt[ipar][1], popt[ipar][0]] par.value = range[0] while (pf.pllr(data, uncond_nllf=nllfmin) < quantile and iter < maxiter): range[0] = range[0] - math.sqrt(quantile) * popt[ipar][1] par.value = range[0] iter += 1 par.value = range[1] logger.debug('%s lower bound root finding range is %s' % (par.name, range)) lower_bound = root_finder(_pllr_root_finding, range[0], range[1], args=(pf, data, par, quantile, nllfmin)) logger.debug('%s lower bound = %f' % (par.name, lower_bound)) # par.neg_unc = lower_bound - popt[ipar][0] par.pos_unc = upper_bound - popt[ipar][0] for ipar,par in enumerate(params): par.value = popt[ipar][0] par.unc = popt[ipar][1] return params, corr, quantile
def _pllr_root_finding(x, pf, data, par, quantile, nllfmin): par.value = x return (pf.pllr(data, uncond_nllf=nllfmin) - quantile)