Source code for limix.qc.trans

from __future__ import division

from numpy import inf

import numpy as np
from brent_search import brent


[docs]def mean_standardize(X, axis=None, out=None): r"""Zero-mean and one-deviation normalisation. Normalise in such a way that the mean and variance are equal to zero and one. This transformation is taken over the flattened array by default, otherwise over the specified axis. Missing values represented by ``NaN`` are ignored. It works well with Dask array. Parameters ---------- X : array_like Array to be normalised. axis : None or int, optional Axis along which the normalisation will take place. The default is to normalise the flattened array. Returns ------- array_like Normalized array. Examples -------- .. doctest:: >>> import limix >>> from numpy import arange, array_str >>> >>> X = arange(15).reshape((5, 3)) >>> print(X) [[ 0 1 2] [ 3 4 5] [ 6 7 8] [ 9 10 11] [12 13 14]] >>> X = limix.qc.mean_standardize(X, axis=0) >>> print(array_str(X, precision=4)) [[-1.4142 -1.4142 -1.4142] [-0.7071 -0.7071 -0.7071] [ 0. 0. 0. ] [ 0.7071 0.7071 0.7071] [ 1.4142 1.4142 1.4142]] """ import dask.array as da if isinstance(X, da.Array): return _mean_standardize(da, X, axis=axis, out=out) return _mean_standardize(np, X, axis=axis, out=out)
[docs]def quantile_gaussianize(x): r"""Normalize a sequence of values via rank and Normal c.d.f. Parameters ---------- x : array_like Sequence of values. Returns ------- array_like Gaussian-normalized values. Examples -------- .. doctest:: >>> from limix.qc import quantile_gaussianize >>> from numpy import array_str >>> >>> qg = quantile_gaussianize([-1, 0, 2]) >>> print(array_str(qg, precision=4)) [-0.6745 0. 0.6745] """ from scipy_sugar.stats import quantile_gaussianize as qg return qg(x)
[docs]def boxcox(x): r"""Box Cox transformation for normality conformance. It applies the power transformation .. math:: f(x) = \begin{cases} \frac{x^{\lambda} - 1}{\lambda}, & \text{if } \lambda > 0; \\ \log(x), & \text{if } \lambda = 0. \end{cases} to the provided data, hopefully making it more normal distribution-like. The :math:`\lambda` parameter is fit by maximum likelihood estimation. Parameters ---------- X : array_like Data to be transformed. Returns ------- array_like Box Cox transformed data. Examples -------- .. plot:: import limix from matplotlib import pyplot as plt import numpy as np import scipy.stats as stats np.random.seed(0) x = stats.loggamma.rvs(0.1, size=100) y = limix.qc.boxcox(x) fig = plt.figure() ax1 = fig.add_subplot(211) stats.probplot(x, dist=stats.norm, plot=ax1) ax2 = fig.add_subplot(212) stats.probplot(y, dist=stats.norm, plot=ax2) """ import dask.array as da if isinstance(x, da.Array): return _boxcox(da, x) return _boxcox(np, x)
def _boxcox(lib, x): from numpy_sugar import epsilon from scipy.stats import boxcox_llf from scipy.special import boxcox as bc x = lib.asarray(x).astype(float) m = x.min() if m <= 0: m = max(lib.abs(m), epsilon.small) x = x + m + m / 2 lmb = brent(lambda lmb: -boxcox_llf(lmb, x), -5, +5)[0] return bc(x, lmb) def _mean_standardize(lib, X, axis=None, out=None): from numpy_sugar import epsilon X = lib.asarray(X).astype(float) if axis is None: X = X.ravel() axis = 0 shape = X.shape nshape = shape[:axis] + (1,) + shape[axis + 1 :] X = X - lib.nanmean(X, axis=axis).reshape(nshape) d = lib.nanstd(X, axis=axis).reshape(nshape) d = lib.clip(d, epsilon.tiny, inf) X /= d return X