Source code for limix.qc.trans

from __future__ import division

from numpy import inf

import numpy as np
from brent_search import brent


[docs]def mean_standardize(X, axis=None, out=None):
    r"""Zero-mean and one-deviation normalisation.

    Normalise in such a way that the mean and variance are equal to zero and
    one.
    This transformation is taken over the flattened array by default, otherwise
    over the specified axis.
    Missing values represented by ``NaN`` are ignored.

    It works well with Dask array.

    Parameters
    ----------
    X : array_like
        Array to be normalised.
    axis : None or int, optional
        Axis along which the normalisation will take place. The default is to
        normalise the flattened array.

    Returns
    -------
    array_like
        Normalized array.

    Examples
    --------

    .. doctest::

        >>> import limix
        >>> from numpy import arange, array_str
        >>>
        >>> X = arange(15).reshape((5, 3))
        >>> print(X)
        [[ 0  1  2]
         [ 3  4  5]
         [ 6  7  8]
         [ 9 10 11]
         [12 13 14]]
        >>> X = limix.qc.mean_standardize(X, axis=0)
        >>> print(array_str(X, precision=4))
        [[-1.4142 -1.4142 -1.4142]
         [-0.7071 -0.7071 -0.7071]
         [ 0.      0.      0.    ]
         [ 0.7071  0.7071  0.7071]
         [ 1.4142  1.4142  1.4142]]
    """
    import dask.array as da

    if isinstance(X, da.Array):
        return _mean_standardize(da, X, axis=axis, out=out)
    return _mean_standardize(np, X, axis=axis, out=out)


[docs]def quantile_gaussianize(x):
    r"""Normalize a sequence of values via rank and Normal c.d.f.

    Parameters
    ----------
    x : array_like
        Sequence of values.

    Returns
    -------
    array_like
        Gaussian-normalized values.

    Examples
    --------
    .. doctest::

        >>> from limix.qc import quantile_gaussianize
        >>> from numpy import array_str
        >>>
        >>> qg = quantile_gaussianize([-1, 0, 2])
        >>> print(array_str(qg, precision=4))
        [-0.6745  0.      0.6745]
    """

    from scipy_sugar.stats import quantile_gaussianize as qg

    return qg(x)


[docs]def boxcox(x):
    r"""Box Cox transformation for normality conformance.

    It applies the power transformation

    .. math::

        f(x) = \begin{cases}
            \frac{x^{\lambda} - 1}{\lambda}, & \text{if } \lambda > 0; \\
            \log(x), & \text{if } \lambda = 0.
        \end{cases}

    to the provided data, hopefully making it more normal distribution-like.
    The :math:`\lambda` parameter is fit by maximum likelihood estimation.

    Parameters
    ----------
    X : array_like
        Data to be transformed.

    Returns
    -------
    array_like
        Box Cox transformed data.

    Examples
    --------
    .. plot::

        import limix
        from matplotlib import pyplot as plt
        import numpy as np
        import scipy.stats as stats

        np.random.seed(0)

        x = stats.loggamma.rvs(0.1, size=100)
        y = limix.qc.boxcox(x)

        fig = plt.figure()

        ax1 = fig.add_subplot(211)
        stats.probplot(x, dist=stats.norm, plot=ax1)

        ax2 = fig.add_subplot(212)
        stats.probplot(y, dist=stats.norm, plot=ax2)
    """
    import dask.array as da

    if isinstance(x, da.Array):
        return _boxcox(da, x)
    return _boxcox(np, x)


def _boxcox(lib, x):
    from numpy_sugar import epsilon
    from scipy.stats import boxcox_llf
    from scipy.special import boxcox as bc

    x = lib.asarray(x).astype(float)

    m = x.min()
    if m <= 0:
        m = max(lib.abs(m), epsilon.small)
        x = x + m + m / 2

    lmb = brent(lambda lmb: -boxcox_llf(lmb, x), -5, +5)[0]
    return bc(x, lmb)


def _mean_standardize(lib, X, axis=None, out=None):
    from numpy_sugar import epsilon

    X = lib.asarray(X).astype(float)

    if axis is None:
        X = X.ravel()
        axis = 0

    shape = X.shape
    nshape = shape[:axis] + (1,) + shape[axis + 1 :]

    X = X - lib.nanmean(X, axis=axis).reshape(nshape)
    d = lib.nanstd(X, axis=axis).reshape(nshape)
    d = lib.clip(d, epsilon.tiny, inf)
    X /= d

    return X