# TODO: implement test for this class
[docs]class Chi2Mixture(object):
r"""A class for continuous random variable following a chi2 mixture.
Class for evaluation of P values for a test statistic that follows a
two-component mixture of chi2
.. math::
(1-\pi)\chi^2(0) + \pi a \chi^2(d).
Here :math:`\pi` is the probability being in the first component and
:math:`a` and :math:`d` are the scale parameter and the number of
degrees of freedom of the second component.
Parameters
----------
scale_min : float
Minimum value used for fitting the scale parameter.
scale_max : float
Maximum value used for fitting the scale parameter.
dofmin : float
Minimum value used for fitting the dof parameter.
dofmax : float
Maximum value used for fitting the dof parameter.
qmax : float
Only the top qmax quantile is used for the fit.
n_interval : int
Number of intervals when performing gridsearch.
tol : float
Tolerance of being zero.
Examples
--------
.. doctest::
>>> from numpy.random import RandomState
>>> import scipy as sp
>>> from limix.stats import Chi2Mixture
>>>
>>> scale = 0.3
>>> dof = 2
>>> mixture = 0.2
>>> n = 100
>>>
>>> random = RandomState(1)
>>> x = random.chisquare(dof, n)
>>> n0 = int( (1-mixture) * n)
>>> idxs = random.choice(n, n0, replace=False)
>>> x[idxs] = 0
>>>
>>> chi2mix = Chi2Mixture(scale_min=0.1, scale_max=5.0,
... dof_min=0.1, dof_max=5.0,
... qmax=0.1, tol=4e-3)
>>> chi2mix.estimate_chi2mixture(x)
>>> pv = chi2mix.sf(x)
>>> print(pv[:4]) # doctest: +FLOAT_CMP
[0.2 0.2 0.2 0.2]
>>>
>>> print('%.2f' % chi2mix.scale)
1.98
>>> print('%.2f' % chi2mix.dof)
0.89
>>> print('%.2f' % chi2mix.mixture)
0.20
"""
[docs] def __init__(
self,
scale_min=0.1,
scale_max=5.0,
dof_min=0.1,
dof_max=5.0,
n_intervals=100,
qmax=0.1,
tol=0,
):
self.scale_min = scale_min
self.scale_max = scale_max
self.dof_min = dof_min
self.dof_max = dof_max
self.qmax = qmax
self.n_intervals = n_intervals
self.tol = tol
[docs] def estimate_chi2mixture(self, lrt):
r"""Estimates the parameters of a chi2 mixture.
Estimates the parameters of a chi2 mixture by fitting the empirical
distribution of null test statistic.
Parameters
----------
lrt : array_like
Null test statistcs.
"""
import scipy as sp
import scipy.stats as st
# step 1: estimate the probability of being in component one
self.mixture = 1 - (lrt <= self.tol).mean()
n_false = sp.sum(lrt > self.tol)
# step 2: only use the largest qmax fraction of test statistics to
# estimate the remaining parameters
n_fitting = int(sp.ceil(self.qmax * n_false))
lrt_sorted = -sp.sort(-lrt)[:n_fitting]
q = sp.linspace(0, 1, n_false)[1 : n_fitting + 1]
log_q = sp.log10(q)
# step 3: fitting scale and dof by minimizing the squared error
# of the log10 p-values with their theorietical values
# [uniform distribution]
MSE_opt = sp.inf
MSE = sp.zeros((self.n_intervals, self.n_intervals))
for i, scale in enumerate(
sp.linspace(self.scale_min, self.scale_max, self.n_intervals)
):
for j, dof in enumerate(
sp.linspace(self.dof_min, self.dof_max, self.n_intervals)
):
p = st.chi2.sf(lrt_sorted / scale, dof)
log_p = sp.log10(p)
MSE[i, j] = sp.mean((log_q - log_p) ** 2)
if MSE[i, j] < MSE_opt:
MSE_opt = MSE[i, j]
self.scale = scale
self.dof = dof
[docs] def sf(self, lrt):
r"""Computes the p-values from test statistics lrt.
Parameters
----------
lrt : array_like
Test statistics.
Returns
-------
array_like
P-values.
"""
import scipy as sp
import scipy.stats as st
_lrt = sp.copy(lrt)
_lrt[lrt < self.tol] = 0
pv = self.mixture * st.chi2.sf(_lrt / self.scale, self.dof)
return pv