Source code for limix.stats._allele

from .._dask import array_shape_reveal


[docs]def allele_frequency(expec): r"""Compute allele frequency from its expectation. Parameters ---------- expec : array_like Allele expectations encoded as a variants-by-samples-by-alleles matrix. Returns ------- :class:`numpy.ndarray` Allele frequencies encoded as a variants-by-alleles matrix. """ ploidy = expec.shape[-1] if expec.ndim < 3: n = 1 else: n = expec.shape[1] return expec.sum(-2) / (ploidy * n)
[docs]def compute_dosage(expec, alt=None): r"""Compute dosage from allele expectation. Parameters ---------- expec : array_like Allele expectations encoded as a variants-by-samples-by-alleles matrix. ref : array_like Allele reference of each locus. The allele having the minor allele frequency for the provided ``expec`` is used as the reference if `None`. Defaults to `None`. Returns ------- :class:`numpy.ndarray` Dosage encoded as a variants-by-samples matrix. Examples -------- .. doctest:: >>> from bgen_reader import read_bgen, allele_expectation, example_files >>> from bgen_reader import compute_dosage >>> >>> with example_files("example.32bits.bgen") as filepath: ... bgen = read_bgen(filepath, verbose=False) ... e = allele_expectation(bgen["genotype"], nalleles=2, ploidy=2) ... dosage = compute_dosage(e).compute() ... print(dosage.shape) ... print(dosage) (199, 500) [[ nan 1.93575854 1.91558579 ... 1.94351192 0.10894776 1.01101689] [1.98779296 1.97802735 0.02111815 ... 1.95492412 1.00897216 1.02255316] [0.01550294 0.99383543 1.97933958 ... 1.98681641 1.99041748 1.99603272] ... [1.99319479 1.980896 1.98767124 ... 1.9943846 1.99716186 1.98712159] [0.01263467 0.09661863 0.00869752 ... 0.00643921 0.00494384 0.01504517] [0.99185182 1.94860838 0.99734497 ... 0.02914425 1.97827146 0.9515991 ]] """ from numpy import asarray if alt is None: return expec[..., -1] try: return expec[alt, :, alt] except NotImplementedError: alt = asarray(alt, int) return asarray(expec, float)[alt, :, alt]
[docs]def allele_expectation(p, nalleles, ploidy): r"""Allele expectation. Compute the expectation of each allele from the given probabilities. It accepts three shapes of matrices: - unidimensional array of probabilities; - bidimensional samples-by-alleles probabilities array; - and three dimensional variants-by-samples-by-alleles array. Parameters ---------- p : array_like Allele probabilities. nalleles : int Number of alleles. ploidy : int Number of complete sets of chromosomes. Returns ------- :class:`numpy.ndarray` Last dimension will contain the expectation of each allele. Examples -------- .. doctest:: >>> from texttable import Texttable >>> from bgen_reader import read_bgen, allele_expectation, example_files >>> >>> sampleid = "sample_005" >>> rsid = "RSID_6" >>> >>> with example_files("example.32bits.bgen") as filepath: ... bgen = read_bgen(filepath, verbose=False) ... ... locus = bgen["variants"].query("rsid == '{}'".format(rsid)).index ... sample = bgen["samples"].query("id == '{}'".format(sampleid)).index ... ... nalleles = bgen["variants"].loc[locus, "nalleles"].item() ... ploidy = 2 ... ... p = bgen["genotype"][locus[0], sample[0]].compute() ... # For unphased genotypes only. ... e = allele_expectation(bgen["genotype"][locus[0], sample[0]], nalleles, ploidy) ... ... alleles = bgen["variants"].loc[locus, "allele_ids"].item().split(",") ... ... tab = Texttable() ... ... tab.add_rows( ... [ ... ["", "AA", "AG", "GG", "E[.]"], ... ["p"] + list(p) + [1.0], ... ["#" + alleles[0], 2, 1, 0, e[0]], ... ["#" + alleles[1], 0, 1, 2, e[1]], ... ] ... ) >>> print(tab.draw()) +----+-------+-------+-------+-------+ | | AA | AG | GG | E[.] | +====+=======+=======+=======+=======+ | p | 0.012 | 0.987 | 0.001 | 1 | +----+-------+-------+-------+-------+ | #A | 2 | 1 | 0 | 1.011 | +----+-------+-------+-------+-------+ | #G | 0 | 1 | 2 | 0.989 | +----+-------+-------+-------+-------+ >>> print("variant: {}".format(rsid)) variant: RSID_6 >>> print("sample : {}".format(sampleid)) sample : sample_005 Note ---- This function supports unphased genotypes only. """ from numpy import asarray, newaxis g = _get_genotypes(ploidy, nalleles) c = asarray(_genotypes_to_allele_counts(g), float) c = c.T.reshape((1,) * (p.ndim - 1) + (c.shape[1], c.shape[0])) p = array_shape_reveal(p) return (c * p[..., newaxis, :]).sum(-1)
def _get_genotypes(ploidy, nalleles): g = _make_genotypes(ploidy, 1, nalleles) g = sorted([list(reversed(i)) for i in g]) g = [list(reversed(i)) for i in g] return g def _make_genotypes(ploidy, start, end): tups = [] if ploidy == 0: return tups if ploidy == 1: return [[i] for i in range(start, end + 1)] for i in range(start, end + 1): t = _make_genotypes(ploidy - 1, i, end) for ti in t: tups += [[i] + ti] return tups def _genotypes_to_allele_counts(genotypes): nalleles = genotypes[-1][0] counts = [] for g in genotypes: count = [0] * nalleles for gi in g: count[gi - 1] += 1 counts.append(count) return counts