Source code for limix.her._estimate

from __future__ import division

from numpy import ones, pi, var

from ..display import session_text, timer_text
from .._dataset import _normalise_dataset
from .._likelihood import assert_likelihood_name, normalise_extreme_values
from ..qc import normalise_covariance


[docs]def estimate(y, lik, K, M=None, verbose=True):
    r"""Estimate the so-called narrow-sense heritability.

    It supports Normal, Bernoulli, Probit, Binomial, and Poisson phenotypes.
    Let :math:`N` be the sample size and :math:`S` the number of covariates.

    Parameters
    ----------
    y : array_like
        Either a tuple of two arrays of `N` individuals each (Binomial
        phenotypes) or an array of `N` individuals (Normal, Poisson, or
        Bernoulli phenotypes). If a continuous phenotype is provided (i.e., a Normal
        one), make sure they have been normalised in such a way that its values are
        not extremely large; it might cause numerical errors otherwise. For example,
        by using :func:`limix.qc.mean_standardize` or
        :func:`limix.qc.quantile_gaussianize`.
    lik : "normal", "bernoulli", "probit", binomial", "poisson"
        Sample likelihood describing the residual distribution.
    K : array_like
        :math:`N`-by-:math:`N` covariance matrix. It might be, for example, the
        estimated kinship relationship between the individuals. The provided matrix will
        be normalised via the function :func:`limix.qc.normalise_covariance`.
    M : array_like, optional
        :math:`N` individuals by :math:`S` covariates.
        It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset
        covariate if ``None`` is passed. If an array is passed, it will used as is.
        Defaults to ``None``.
    verbose : bool, optional
        ``True`` to display progress and summary; ``False`` otherwise.

    Returns
    -------
    float
        Estimated heritability.

    Examples
    --------
    .. doctest::

        >>> from numpy import dot, exp, sqrt
        >>> from numpy.random import RandomState
        >>> from limix.her import estimate
        >>>
        >>> random = RandomState(0)
        >>>
        >>> G = random.randn(150, 200) / sqrt(200)
        >>> K = dot(G, G.T)
        >>> z = dot(G, random.randn(200)) + random.randn(150)
        >>> y = random.poisson(exp(z))
        >>>
        >>> print('%.3f' % estimate(y, 'poisson', K, verbose=False))  # doctest: +FLOAT_CMP
        0.183

    Notes
    -----
    It will raise a ``ValueError`` exception if non-finite values are passed. Please,
    refer to the :func:`limix.qc.mean_impute` function for missing value imputation.
    """
    from numpy_sugar import is_all_finite
    from numpy_sugar.linalg import economic_qs
    from glimix_core.glmm import GLMMExpFam
    from glimix_core.lmm import LMM

    if not isinstance(lik, (tuple, list)):
        lik = (lik,)

    lik_name = lik[0].lower()
    assert_likelihood_name(lik_name)

    with session_text("heritability analysis", disable=not verbose):

        if M is None:
            M = ones((len(y), 1))

        with timer_text("Normalising input...", disable=not verbose):
            data = _normalise_dataset(y, M=M, K=K)

        y = data["y"]
        M = data["M"]
        K = data["K"]

        if not is_all_finite(y):
            raise ValueError("Outcome must have finite values only.")

        if not is_all_finite(M):
            raise ValueError("Covariates must have finite values only.")

        if K is not None:
            if not is_all_finite(K):
                raise ValueError("Covariate matrix must have finite values only.")

            K = normalise_covariance(K)

        y = normalise_extreme_values(y, lik)

        if K is not None:
            QS = economic_qs(K)
        else:
            QS = None

        if lik_name == "normal":
            method = LMM(y.values, M.values, QS)
            method.fit(verbose=verbose)
        else:
            method = GLMMExpFam(y, lik, M.values, QS, n_int=500)
            method.fit(verbose=verbose, factr=1e6, pgtol=1e-3)

        g = method.scale * (1 - method.delta)
        e = method.scale * method.delta
        if lik_name == "bernoulli":
            e += pi * pi / 3

        if lik_name == "normal":
            v = method.fixed_effects_variance
        else:
            v = var(method.mean())

        return g / (v + g + e)