Java Code Examples for htsjdk.variant.variantcontext.Genotype#getPloidy()
The following examples show how to use
htsjdk.variant.variantcontext.Genotype#getPloidy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Initialize cache of allele anyploid indices * * Initialize the cache of PL index to a list of alleles for each ploidy. * * @param vc Variant Context */ private void initalizeAlleleAnyploidIndicesCache(final VariantContext vc) { if (vc.getType() != VariantContext.Type.NO_VARIATION) { // Bypass if not a variant for (final Genotype g : vc.getGenotypes()) { // Make a new entry if the we have not yet cached a PL to allele indices map for this ploidy and allele count // skip if there are no PLs -- this avoids hanging on high-allelic somatic samples, for example, where // there's no need for the PL indices since they don't exist if (g.getPloidy() != 0 && (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles())) { if (vc.getGenotypes().stream().anyMatch(Genotype::hasLikelihoods)) { GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy()); ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles()); } } } } }
Example 2
Source File: GVCFBlockCombiner.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) { final HomRefBlock currentHomRefBlock = (HomRefBlock)currentBlock; return currentHomRefBlock != null && currentHomRefBlock.withinBounds(Math.min(g.getGQ(), MAX_GENOTYPE_QUAL)) && currentHomRefBlock.getPloidy() == g.getPloidy() && (currentHomRefBlock.getMinPLs() == null || !g.hasPL() || (currentHomRefBlock.getMinPLs().length == g.getPL().length)); }
Example 3
Source File: VariantAFEvaluator.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void update1(VariantContext vc, final ReferenceContext referenceContext, final ReadsContext readsContext, final FeatureContext featureContext) { vc.getStart(); if (vc == null || !vc.isSNP() || (getWalker().ignoreAC0Sites() && vc.isMonomorphicInSamples())) { return; } for (final Genotype genotype : vc.getGenotypes()) { // eval array if (!genotype.isNoCall()) { if (genotype.getPloidy() != PLOIDY) { throw new UserException.BadInput("This tool only works with ploidy 2"); } // add AF at this site this.totalCalledSites += 1; int numReferenceAlleles= genotype.countAllele(vc.getReference()); double varAFHere = (PLOIDY - numReferenceAlleles)/PLOIDY; this.sumVariantAFs += varAFHere; totalHetSites += numReferenceAlleles == 1 ? 1 : 0; totalHomVarSites += numReferenceAlleles == 0 ? 1 : 0; totalHomRefSites += numReferenceAlleles == 2 ? 1 : 0; } } if (!vc.hasGenotypes()) { // comp ( sites only thousand genomes ) this.totalCalledSites += 1; this.sumVariantAFs += vc.getAttributeAsDouble("AF", 0.0); } }
Example 4
Source File: StatisticsTask.java From imputationserver with GNU Affero General Public License v3.0 | 4 votes |
public void checkPloidy(List<String> samples, VariantContext snp, boolean isPhased, LineWriter chrXInfoWriter, HashSet<String> hapSamples) throws IOException { for (final String name : samples) { Genotype genotype = snp.getGenotype(name); if (hapSamples.contains(name) && genotype.getPloidy() != 1) { chrXInfoWriter.write(name + "\t" + snp.getContig() + ":" + snp.getStart()); this.chrXPloidyError = true; } if (genotype.getPloidy() == 1) { hapSamples.add(name); } } }
Example 5
Source File: VcfToVariant.java From genomewarp with Apache License 2.0 | 4 votes |
@VisibleForTesting static List<VariantCall> getCalls(VariantContext vc, VCFHeader header) { List<VariantCall> toReturn = new ArrayList<>(); for (String currSample : header.getGenotypeSamples()) { if (!vc.hasGenotype(currSample)) { continue; } Genotype currGenotype = vc.getGenotype(currSample); VariantCall.Builder vcBuilder = VariantCall.newBuilder(); vcBuilder.setCallSetName(currSample); // Get GT info. final Map<Allele, Integer> alleleStrings = buildAlleleMap(vc); vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(0))); for (int i = 1; i < currGenotype.getPloidy(); i++) { vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(i))); } // Set phasing (not applicable to haploid). if (currGenotype.isPhased() && currGenotype.getPloidy() > 1) { vcBuilder.setPhaseset("*"); } // Get rest of the genotype info. Map<String, ListValue> genotypeInfo = new HashMap<>(); // Set filters if (currGenotype.isFiltered()) { genotypeInfo.put(VCFConstants.GENOTYPE_FILTER_KEY, ListValue.newBuilder() .addValues(Value.newBuilder().setStringValue(currGenotype.getFilters()).build()) .build()); } for (final String field : vc.calcVCFGenotypeKeys(header)) { // We've already handled genotype if (field.equals(VCFConstants.GENOTYPE_KEY)) { continue; } ListValue.Builder listValueBuilder = ListValue.newBuilder(); if (field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { // This field has already been dealt with continue; } else { final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field); if (accessor != null) { // The field is a default inline field. if (!parseInlineGenotypeFields(field, vcBuilder, listValueBuilder, accessor, currGenotype)) { continue; } } else { // Other field, we'll get type/other info from header. if (!parseOtherGenotypeFields(field, vc, listValueBuilder, currGenotype, header)) { continue; } } } genotypeInfo.put(field, listValueBuilder.build()); } vcBuilder.putAllInfo(genotypeInfo); toReturn.add(vcBuilder.build()); } return toReturn; }
Example 6
Source File: HomRefBlock.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Add a homRef block to the current block * * @param pos current genomic position * @param newEnd new calculated block end position * @param genotype A non-null Genotype with GQ and DP attributes */ @Override public void add(final int pos, final int newEnd, final Genotype genotype) { Utils.nonNull(genotype, "genotype cannot be null"); if ( ! genotype.hasPL() ) { throw new IllegalArgumentException("genotype must have PL field");} if ( pos != end + 1 ) { throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous end " + end); } if ( genotype.getPloidy() != ploidy) { throw new IllegalArgumentException("cannot add a genotype with a different ploidy: " + genotype.getPloidy() + " != " + ploidy); } // Make sure the GQ is within the bounds of this band. Treat GQs > 99 as 99. if ( !withinBounds(Math.min(genotype.getGQ(), VCFConstants.MAX_GENOTYPE_QUAL))) { throw new IllegalArgumentException("cannot add a genotype with GQ=" + genotype.getGQ() + " because it's not within bounds [" + this.getGQLowerBound() + ',' + this.getGQUpperBound() + ')'); } if( minPLs == null ) { minPLs = genotype.getPL(); } else { // otherwise take the min with the provided genotype's PLs final int[] pls = genotype.getPL(); if (pls.length != minPLs.length) { throw new GATKException("trying to merge different PL array sizes: " + pls.length + " != " + minPLs.length); } for (int i = 0; i < pls.length; i++) { minPLs[i] = Math.min(minPLs[i], pls[i]); } } if( genotype.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY)) { if (minPPs == null ) { minPPs = PosteriorProbabilitiesUtils.parsePosteriorsIntoPhredSpace(genotype); } else { // otherwise take the min with the provided genotype's PLs final int[] pps = PosteriorProbabilitiesUtils.parsePosteriorsIntoPhredSpace(genotype); if (pps.length != minPPs.length) { throw new GATKException("trying to merge different PP array sizes: " + pps.length + " != " + minPPs.length); } for (int i = 0; i < pps.length; i++) { minPPs[i] = Math.min(minPPs[i], pps[i]); } } } end = newEnd; DPs.add(Math.max(genotype.getDP(), 0)); // DP must be >= 0 }
Example 7
Source File: AlleleFrequencyCalculator.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc * * @param vc the VariantContext holding the alleles and sample information. The VariantContext * must have at least 1 alternative allele * @return result (for programming convenience) */ public AFCalculationResult calculate(final VariantContext vc, final int defaultPloidy) { Utils.nonNull(vc, "VariantContext cannot be null"); final int numAlleles = vc.getNAlleles(); final List<Allele> alleles = vc.getAlleles(); Utils.validateArg( numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc); final double[] priorPseudocounts = alleles.stream() .mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() == vc.getReference().length() ? snpPseudocount : indelPseudocount)).toArray(); double[] alleleCounts = new double[numAlleles]; final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles); // log10(1/numAlleles) double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency); for (double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY; alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE; ) { final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies); alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble(); alleleCounts = newAlleleCounts; final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts); // first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low // effective allele frequency that it overwhelms the genotype likelihood of a real variant // basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights(); } double[] log10POfZeroCountsByAllele = new double[numAlleles]; double log10PNoVariant = 0; final boolean spanningDeletionPresent = alleles.contains(Allele.SPAN_DEL); final Map<Integer, int[]> nonVariantIndicesByPloidy = new Int2ObjectArrayMap<>(); // re-usable buffers of the log10 genotype posteriors of genotypes missing each allele final List<DoubleArrayList> log10AbsentPosteriors = IntStream.range(0,numAlleles).mapToObj(n -> new DoubleArrayList()).collect(Collectors.toList()); for (final Genotype g : vc.getGenotypes()) { if (!g.hasLikelihoods()) { continue; } final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy(); final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles); final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies); //the total probability if (!spanningDeletionPresent) { log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX]; } else { nonVariantIndicesByPloidy.computeIfAbsent(ploidy, p -> genotypeIndicesWithOnlyRefAndSpanDel(p, alleles)); final int[] nonVariantIndices = nonVariantIndicesByPloidy.get(ploidy); final double[] nonVariantLog10Posteriors = MathUtils.applyToArray(nonVariantIndices, n -> log10GenotypePosteriors[n]); // when the only alt allele is the spanning deletion the probability that the site is non-variant // may be so close to 1 that finite precision error in log10SumLog10 yields a positive value, // which is bogus. Thus we cap it at 0. log10PNoVariant += Math.min(0,MathUtils.log10SumLog10(nonVariantLog10Posteriors)); } // if the VC is biallelic the allele-specific qual equals the variant qual if (numAlleles == 2) { continue; } // for each allele, we collect the log10 probabilities of genotypes in which the allele is absent, then add (in log space) // to get the log10 probability that the allele is absent in this sample log10AbsentPosteriors.forEach(DoubleArrayList::clear); // clear the buffers. Note that this is O(1) due to the primitive backing array for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) { final double log10GenotypePosterior = log10GenotypePosteriors[genotype]; glCalc.genotypeAlleleCountsAt(genotype).forEachAbsentAlleleIndex(a -> log10AbsentPosteriors.get(a).add(log10GenotypePosterior), numAlleles); } final double[] log10PNoAllele = log10AbsentPosteriors.stream() .mapToDouble(buffer -> MathUtils.log10SumLog10(buffer.toDoubleArray())) .map(x -> Math.min(0, x)).toArray(); // if prob of non hom ref > 1 due to finite precision, short-circuit to avoid NaN // multiply the cumulative probabilities of alleles being absent, which is addition of logs MathUtils.addToArrayInPlace(log10POfZeroCountsByAllele, log10PNoAllele); } // for biallelic the allele-specific qual equals the variant qual, and we short-circuited the calculation above if (numAlleles == 2) { log10POfZeroCountsByAllele[1] = log10PNoVariant; } // unfortunately AFCalculationResult expects integers for the MLE. We really should emit the EM no-integer values // which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray(); final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles); //skip the ref allele (index 0) final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed() .collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a])); return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PNoVariant, log10PRefByAllele); }