Source code for limix.qc._allele

from __future__ import division

from numpy import isnan, logical_not, minimum, nansum


[docs]def compute_maf(X): r"""Compute minor allele frequencies. It assumes that ``X`` encodes 0, 1, and 2 representing the number of alleles (or dosage), or ``NaN`` to represent missing values. Parameters ---------- X : array_like Genotype matrix. Returns ------- array_like Minor allele frequencies. Examples -------- .. doctest:: >>> from numpy.random import RandomState >>> from limix.qc import compute_maf >>> >>> random = RandomState(0) >>> X = random.randint(0, 3, size=(100, 10)) >>> >>> print(compute_maf(X)) # doctest: +FLOAT_CMP [0.49 0.49 0.445 0.495 0.5 0.45 0.48 0.48 0.47 0.435] """ import dask.array as da if isinstance(X, da.Array): s0 = da.nansum(X, axis=0).compute() denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute() else: s0 = nansum(X, axis=0) denom = 2 * logical_not(isnan(X)).sum(axis=0) s0 = s0 / denom s1 = 1 - s0 return minimum(s0, s1)