Java Code Examples for htsjdk.variant.variantcontext.VariantContext#getGenotype()
The following examples show how to use
htsjdk.variant.variantcontext.VariantContext#getGenotype() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OrientationBiasFiltererUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testAnnotateVariantContextWithPreprocessingValues() { final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2Vcf)); SortedSet<Transition> relevantTransitions = new TreeSet<>(); relevantTransitions.add(Transition.transitionOf('G', 'T')); final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>(); final double amGTPreAdapterQ = 20.0; preAdapterQFakeScoreMap.put(Transition.transitionOf('G', 'T'), amGTPreAdapterQ); // preAdapterQ suppression will do nothing. for (final VariantContext vc : featureDataSource) { final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap); final Genotype genotypeTumor = updatedVariantContext.getGenotype("TUMOR"); final Genotype genotypeNormal = updatedVariantContext.getGenotype("NORMAL"); Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.FOB, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE); Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE); assertArtifact(amGTPreAdapterQ, genotypeTumor, Transition.transitionOf('G', 'T')); // The NORMAL is always ref/ref in the example file. assertNormal(genotypeNormal); } }
Example 2
Source File: Mutect2IntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test @SuppressWarnings("deprecation") public void testAFAtHighDP() { Utils.resetRandomGenerator(); final File unfilteredVcf = createTempFile("unfiltered", ".vcf"); runMutect2(DEEP_MITO_BAM, unfilteredVcf, "chrM:1-1018", MITO_REF.getAbsolutePath(), Optional.empty(), args -> args.add(IntervalArgumentCollection.INTERVAL_PADDING_LONG_NAME, 300)); final List<VariantContext> variants = VariantContextTestUtils.streamVcf(unfilteredVcf).collect(Collectors.toList()); for (final VariantContext vc : variants) { Assert.assertTrue(vc.isBiallelic()); //I do some lazy parsing below that won't hold for multiple alternate alleles final Genotype g = vc.getGenotype(DEEP_MITO_SAMPLE_NAME); Assert.assertTrue(g.hasAD()); final int[] ADs = g.getAD(); Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY)); //Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY,"0"))), (double)ADs[1]/(ADs[0]+ADs[1]), 1e-6); Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getAttributeAsString(GATKVCFConstants.ALLELE_FRACTION_KEY, "0"))), (double) ADs[1] / (ADs[0] + ADs[1]), 2e-3); } }
Example 3
Source File: Mutect2FilteringEngine.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) { Genotype tumorGenotype = vc.getGenotype(tumorSample); final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray( tumorGenotype, (StrandArtifact.POSTERIOR_PROBABILITIES_KEY), () -> null, -1); final double[] mapAlleleFractionEstimates = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray( tumorGenotype, (StrandArtifact.MAP_ALLELE_FRACTIONS_KEY), () -> null, -1); if (posteriorProbabilities == null || mapAlleleFractionEstimates == null){ return; } final int maxZIndex = MathUtils.maxElementIndex(posteriorProbabilities); if (maxZIndex == StrandArtifact.StrandArtifactZ.NO_ARTIFACT.ordinal()){ return; } if (posteriorProbabilities[maxZIndex] > MTFAC.STRAND_ARTIFACT_POSTERIOR_PROB_THRESHOLD && mapAlleleFractionEstimates[maxZIndex] < MTFAC.STRAND_ARTIFACT_ALLELE_FRACTION_THRESHOLD){ filters.add(GATKVCFConstants.STRAND_ARTIFACT_FILTER_NAME); } }
Example 4
Source File: PositiveAllelicDepth.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void run() { fileWriter.writeHeader(fileReader.getFileHeader()); for (final VariantContext context : fileReader) { final VariantContextBuilder builder = new VariantContextBuilder(context); final List<Genotype> genotypes = Lists.newArrayList(); for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) { Genotype genotype = context.getGenotype(genotypeIndex); if (genotype.hasAD() && genotype.hasDP()) { int[] ad = genotype.getAD(); int total = 0; boolean updateRecord = false; for (int i = 0; i < ad.length; i++) { int adValue = ad[i]; if (adValue < 0) { updateRecord = true; ad[i] = Math.abs(adValue); } total += Math.abs(adValue); } if (updateRecord) { LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart()); Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make(); genotypes.add(updated); } else { genotypes.add(genotype); } } } builder.genotypes(genotypes); fileWriter.add(builder.make()); } }
Example 5
Source File: PurityAdjustedSomaticVariantFactory.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public VariantContext enrich(@NotNull final VariantContext variant) { final Genotype genotype = variant.getGenotype(tumorSample); if (genotype != null && genotype.hasAD() && HumanChromosome.contains(variant.getContig())) { final GenomePosition position = GenomePositions.create(variant.getContig(), variant.getStart()); final AllelicDepth depth = AllelicDepth.fromGenotype(genotype); enrich(position, depth, PurityAdjustedSomaticVariantBuilder.fromVariantContex(variant)); } return variant; }
Example 6
Source File: HaplotypeCallerIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testGVCFModeGenotypePosteriors() throws Exception { Utils.resetRandomGenerator(); final String inputFileName = NA12878_20_21_WGS_bam; final String referenceFileName =b37_reference_20_21; final File output = createTempFile("testGVCFModeIsConsistentWithPastResults", ".g.vcf"); final String[] args = { "-I", inputFileName, "-R", referenceFileName, "-L", "20:10000000-10100000", "-O", output.getAbsolutePath(), "--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(), "--" + GenotypeCalculationArgumentCollection.SUPPORTING_CALLSET_LONG_NAME, largeFileTestDir + "1000G.phase3.broad.withGenotypes.chr20.10100000.vcf", "--" + GenotypeCalculationArgumentCollection.NUM_REF_SAMPLES_LONG_NAME, "2500", "-pairHMM", "AVX_LOGLESS_CACHING", "--" + AssemblyBasedCallerArgumentCollection.ALLELE_EXTENSION_LONG_NAME, "2", "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false" }; runCommandLine(args); Pair<VCFHeader, List<VariantContext>> results = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); for (final VariantContext vc : results.getRight()) { final Genotype g = vc.getGenotype(0); if (g.hasDP() && g.getDP() > 0 && g.hasGQ() && g.getGQ() > 0) { Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY)); } if (isGVCFReferenceBlock(vc) ) { Assert.assertTrue(!vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY)); } else if (!vc.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE)){ //there are some variants that don't have non-symbolic alts Assert.assertTrue(vc.hasAttribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY)); } } }
Example 7
Source File: CreateSnpIntervalFromVcf.java From Drop-seq with MIT License | 5 votes |
/** Are all the samples listed heterozygous for this variant, and pass GQ threshold?*/ private boolean sitePassesFilters(final VariantContext ctx, final Set<String> samples, final int GQThreshold, final boolean hetSNPsOnly) { boolean flag = true; for (String sample : samples) { Genotype g = ctx.getGenotype(sample); if (g.getGQ()<GQThreshold || (!g.isHet() && hetSNPsOnly)) return (false); } return (flag); }
Example 8
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Checks if the two variants have the same genotypes for the selected samples * * @param vc the variant rod VariantContext. * @param compVCs the comparison VariantContext * @return true if VariantContexts are concordant, false otherwise */ private boolean isConcordant (final VariantContext vc, final Collection<VariantContext> compVCs) { if (vc == null || compVCs == null || compVCs.isEmpty()) { return false; } // if we're not looking for specific samples then the fact that we have both VCs is enough to call it concordant. if (noSamplesSpecified) { return true; } // make a list of all samples contained in this variant VC that are being tracked by the user command line arguments. final Set<String> variantSamples = vc.getSampleNames(); variantSamples.retainAll(samples); // check if we can find all samples from the variant rod in the comp rod. for (final String sample : variantSamples) { boolean foundSample = false; for (final VariantContext compVC : compVCs) { final Genotype varG = vc.getGenotype(sample); final Genotype compG = compVC.getGenotype(sample); if (haveSameGenotypes(varG, compG)) { foundSample = true; break; } } // if at least one sample doesn't have the same genotype, we don't have concordance if (!foundSample) { return false; } } return true; }
Example 9
Source File: HaplotypeCallerIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static String keyForVariant( final VariantContext variant ) { Genotype genotype = variant.getGenotype(0); if (genotype.isPhased()) { // unphase it for comparisons, since we rely on comparing the genotype string below genotype = new GenotypeBuilder(genotype).phased(false).make(); } return String.format("%s:%d-%d %s %s", variant.getContig(), variant.getStart(), variant.getEnd(), variant.getAlleles(), genotype.getGenotypeString(false)); }
Example 10
Source File: XHMMSegmentGenotyperIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void assertVariantSegmentsAreCovered(final List<VariantContext> variants, final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> variantSegments) { for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> variantSegment : variantSegments) { final Optional<VariantContext> match = variants.stream() .filter(vc -> new SimpleInterval(vc).equals(variantSegment.getSegment().getInterval())) .findFirst(); Assert.assertTrue(match.isPresent()); final VariantContext matchedVariant = match.get(); final Genotype genotype = matchedVariant.getGenotype(variantSegment.getSampleName()); final String discovery = genotype.getAnyAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY).toString(); Assert.assertTrue(discovery.equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)); final CopyNumberTriState call = variantSegment.getSegment().getCall(); final List<Allele> gt = genotype.getAlleles(); Assert.assertEquals(gt.size(), 1); // The call may not be the same for case where the event-quality is relatively low. if (variantSegment.getSegment().getEventQuality() > 10) { Assert.assertEquals(CopyNumberTriStateAllele.valueOf(gt.get(0)).state, call, genotype.toString()); } final String[] SQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.SOME_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR); final double someQual = variantSegment.getSegment().getSomeQuality(); Assert.assertEquals(Double.parseDouble(SQ[call == CopyNumberTriState.DELETION ? 0 : 1]), someQual, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString()); final String[] LQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.START_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR); final double startQuality = variantSegment.getSegment().getStartQuality(); Assert.assertEquals(Double.parseDouble(LQ[call == CopyNumberTriState.DELETION ? 0 : 1]), startQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString()); final String[] RQ = genotype.getAnyAttribute(XHMMSegmentGenotyper.END_QUALITY_KEY).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR); final double endQuality = variantSegment.getSegment().getEndQuality(); Assert.assertEquals(Double.parseDouble(RQ[call == CopyNumberTriState.DELETION ? 0 : 1]), endQuality, XHMMSegmentGenotyper.PHRED_SCORE_PRECISION, variantSegment.getSampleName() + " => " + genotype.toString()); // Check the PL. final int[] PL = genotype.getPL(); final int observedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, PL[CopyNumberTriStateAllele.REF.index()] - PL[CopyNumberTriStateAllele.valueOf(call).index()]); final double expectedCallPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleErrorRate(QualityUtils.qualToProb(variantSegment.getSegment().getExactQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION); final double expectedRefPL = GATKProtectedMathUtils.roundPhred(QualityUtils.phredScaleCorrectRate(QualityUtils.qualToProb(variantSegment.getSegment().getEventQuality())), HMMPostProcessor.PHRED_SCORE_PRECISION); final int expectedGQFromPL = Math.min(XHMMSegmentGenotyper.MAX_GQ, (int) Math.round(expectedRefPL - expectedCallPL)); Assert.assertTrue(Math.abs(observedGQFromPL - expectedGQFromPL) <= 1, genotype.toString() + " " + variantSegment.getSegment().getEventQuality()); } }
Example 11
Source File: Mutect2FilteringEngine.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void applyContaminationFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final Collection<String> filters) { final Genotype tumorGenotype = vc.getGenotype(tumorSample); final double[] alleleFractions = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, VCFConstants.ALLELE_FREQUENCY_KEY, () -> new double[] {1.0}, 1.0); final double maxFraction = MathUtils.arrayMax(alleleFractions); if (maxFraction < contamination) { filters.add(CONTAMINATION_FILTER_NAME); } }
Example 12
Source File: MergePedIntoVcf.java From picard with MIT License | 4 votes |
/** * Writes out the VariantContext objects in the order presented to the supplied output file * in VCF format. */ private void writeVcf(final CloseableIterator<VariantContext> variants, final File output, final SAMSequenceDictionary dict, final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) { try(VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(dict) .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS) .build()) { writer.writeHeader(vcfHeader); while (variants.hasNext()) { final VariantContext context = variants.next(); final VariantContextBuilder builder = new VariantContextBuilder(context); if (zCallThresholds.containsKey(context.getID())) { final String[] zThresh = zCallThresholds.get(context.getID()); builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]); builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]); } final Genotype originalGenotype = context.getGenotype(0); final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes(); final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false); final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context); final String zCallAlleles = zCallPedFile.getAlleles(context.getID()); if (zCallAlleles == null) { throw new PicardException("No zCall alleles found for snp " + context.getID()); } final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes()); newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1))); newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1))); final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles, newAttributes); builder.genotypes(newGenotype); logger.record("0", 0); // AC, AF, and AN are recalculated here VariantContextUtils.calculateChromosomeCounts(builder, false); final VariantContext newContext = builder.make(); writer.add(newContext); } } }
Example 13
Source File: EvaluateCopyNumberTriStateCallsIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 4 votes |
private void checkOutputTruthConcordance(final File truthFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) { final List<VariantContext> truthVariants = readVCFFile(truthFile); final List<VariantContext> outputVariants = readVCFFile(vcfOutput); final Set<String> outputSamples = outputVariants.get(0).getSampleNames(); final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile); for (final VariantContext truth : truthVariants) { final List<Target> overlappingTargets = targets.targets(truth); final List<VariantContext> overlappingOutput = outputVariants.stream() .filter(vc -> new SimpleInterval(vc).overlaps(truth)) .collect(Collectors.toList()); if (overlappingTargets.isEmpty()) { Assert.assertTrue(overlappingOutput.isEmpty()); continue; } Assert.assertFalse(overlappingOutput.isEmpty()); Assert.assertEquals(overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).count(), 1); @SuppressWarnings("all") final Optional<VariantContext> prospectiveMatchingOutput = overlappingOutput.stream() .filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))) .findFirst(); Assert.assertTrue(prospectiveMatchingOutput.isPresent()); final VariantContext matchingOutput = prospectiveMatchingOutput.get(); final int[] truthAC = calculateACFromTruth(truth); final long truthAN = MathUtils.sum(truthAC); final double[] truthAF = IntStream.of(Arrays.copyOfRange(truthAC, 1, truthAC.length)).mapToDouble(d -> d / (double) truthAN).toArray(); Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), truthAN); assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, () -> new double[2], 0.0), truthAF, 0.001); assertOutputVariantFilters(filteringOptions, overlappingTargets, matchingOutput, truthAF); for (final String sample : outputSamples) { final Genotype outputGenotype = matchingOutput.getGenotype(sample); final Genotype truthGenotype = truth.getGenotype(sample); final int truthCN = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FORMAT, -1); final int truthGT = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.TRUTH_GENOTYPE_KEY, -1); final Object truthQualObject = outputGenotype.getAnyAttribute(VariantEvaluationContext.TRUTH_QUALITY_KEY); Assert.assertNotNull(truthQualObject, "" + truthGenotype); final double truthQual = Double.parseDouble(String.valueOf(truthQualObject)); if (truthQual < filteringOptions.minimumTruthSegmentQuality) { Assert.assertEquals(outputGenotype.getFilters(),EvaluationFilter.LowQuality.acronym); } else { Assert.assertTrue(outputGenotype.getFilters() == null || outputGenotype.getFilters().equals(VCFConstants.PASSES_FILTERS_v4)); } final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_POSTERIOR, () -> new double[0], Double.NEGATIVE_INFINITY); final double truthDelPosterior = MathUtils.log10SumLog10(truthPosteriors, 0, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT); final double truthRefPosterior = truthPosteriors[EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT]; final double truthDupPosterior = truthPosteriors.length < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT ? Double.NEGATIVE_INFINITY : MathUtils.log10SumLog10(truthPosteriors, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT + 1, truthPosteriors.length); final CopyNumberTriStateAllele truthAllele = truthGT == -1 ? null : CopyNumberTriStateAllele.ALL_ALLELES.get(truthGT); if (truthCN < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) { Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DEL); Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] {truthRefPosterior, truthDupPosterior}), 0.01); } else if (truthCN > EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) { Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DUP); Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] {truthDelPosterior, truthRefPosterior}), 0.01, "" + truthGenotype + " " + outputGenotype); } else { Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.REF); Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] {truthDelPosterior, truthDupPosterior}), 0.01); } final double outputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, -1); final double inputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FRACTION, -1); Assert.assertEquals(outputTruthFraction, inputTruthFraction, 0.01); } } }
Example 14
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final Set<String> selectedSampleNames) { if (fullyDecode) { return; // TODO -- annotations are broken with fully decoded data } if (keepOriginalChrCounts) { final int[] indexOfOriginalAlleleForNewAllele; final List<Allele> newAlleles = builder.getAlleles(); final int numOriginalAlleles = originalVC.getNAlleles(); // if the alleles already match up, we can just copy the previous list of counts if (numOriginalAlleles == newAlleles.size()) { indexOfOriginalAlleleForNewAllele = null; } // otherwise we need to parse them and select out the correct ones else { indexOfOriginalAlleleForNewAllele = new int[newAlleles.size() - 1]; Arrays.fill(indexOfOriginalAlleleForNewAllele, -1); // note that we don't care about the reference allele at position 0 for (int newI = 1; newI < newAlleles.size(); newI++) { final Allele newAlt = newAlleles.get(newI); for (int oldI = 0; oldI < numOriginalAlleles - 1; oldI++) { if (newAlt.equals(originalVC.getAlternateAllele(oldI), false)) { indexOfOriginalAlleleForNewAllele[newI - 1] = oldI; break; } } } } if (originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { builder.attribute(GATKVCFConstants.ORIGINAL_AC_KEY, getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele)); } if (originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) { builder.attribute(GATKVCFConstants.ORIGINAL_AF_KEY, getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele)); } if (originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) { builder.attribute(GATKVCFConstants.ORIGINAL_AN_KEY, originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY)); } } VariantContextUtils.calculateChromosomeCounts(builder, false); if (keepOriginalDepth && originalVC.hasAttribute(VCFConstants.DEPTH_KEY)) { builder.attribute(GATKVCFConstants.ORIGINAL_DP_KEY, originalVC.getAttribute(VCFConstants.DEPTH_KEY)); } boolean sawDP = false; int depth = 0; for (final String sample : selectedSampleNames ) { final Genotype g = originalVC.getGenotype(sample); if (!g.isFiltered()) { if (g.hasDP()) { depth += g.getDP(); sawDP = true; } } } if (sawDP) { builder.attribute(VCFConstants.DEPTH_KEY, depth); } }
Example 15
Source File: VcfToVariant.java From genomewarp with Apache License 2.0 | 4 votes |
@VisibleForTesting static List<VariantCall> getCalls(VariantContext vc, VCFHeader header) { List<VariantCall> toReturn = new ArrayList<>(); for (String currSample : header.getGenotypeSamples()) { if (!vc.hasGenotype(currSample)) { continue; } Genotype currGenotype = vc.getGenotype(currSample); VariantCall.Builder vcBuilder = VariantCall.newBuilder(); vcBuilder.setCallSetName(currSample); // Get GT info. final Map<Allele, Integer> alleleStrings = buildAlleleMap(vc); vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(0))); for (int i = 1; i < currGenotype.getPloidy(); i++) { vcBuilder.addGenotype(alleleStrings.get(currGenotype.getAllele(i))); } // Set phasing (not applicable to haploid). if (currGenotype.isPhased() && currGenotype.getPloidy() > 1) { vcBuilder.setPhaseset("*"); } // Get rest of the genotype info. Map<String, ListValue> genotypeInfo = new HashMap<>(); // Set filters if (currGenotype.isFiltered()) { genotypeInfo.put(VCFConstants.GENOTYPE_FILTER_KEY, ListValue.newBuilder() .addValues(Value.newBuilder().setStringValue(currGenotype.getFilters()).build()) .build()); } for (final String field : vc.calcVCFGenotypeKeys(header)) { // We've already handled genotype if (field.equals(VCFConstants.GENOTYPE_KEY)) { continue; } ListValue.Builder listValueBuilder = ListValue.newBuilder(); if (field.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { // This field has already been dealt with continue; } else { final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field); if (accessor != null) { // The field is a default inline field. if (!parseInlineGenotypeFields(field, vcBuilder, listValueBuilder, accessor, currGenotype)) { continue; } } else { // Other field, we'll get type/other info from header. if (!parseOtherGenotypeFields(field, vc, listValueBuilder, currGenotype, header)) { continue; } } } genotypeInfo.put(field, listValueBuilder.build()); } vcBuilder.putAllInfo(genotypeInfo); toReturn.add(vcBuilder.build()); } return toReturn; }
Example 16
Source File: StatisticsTask.java From imputationserver with GNU Affero General Public License v3.0 | 4 votes |
public void checkPloidy(List<String> samples, VariantContext snp, boolean isPhased, LineWriter chrXInfoWriter, HashSet<String> hapSamples) throws IOException { for (final String name : samples) { Genotype genotype = snp.getGenotype(name); if (hapSamples.contains(name) && genotype.getPloidy() != 1) { chrXInfoWriter.write(name + "\t" + snp.getContig() + ":" + snp.getStart()); this.chrXPloidyError = true; } if (genotype.getPloidy() == 1) { hapSamples.add(name); } } }
Example 17
Source File: VcfToAdpc.java From picard with MIT License | 4 votes |
@Override protected int doWork() { final List<File> inputs = IOUtil.unrollFiles(VCF, IOUtil.VCF_EXTENSIONS); IOUtil.assertFilesAreReadable(inputs); IOUtil.assertFileIsWritable(SAMPLES_FILE); IOUtil.assertFileIsWritable(NUM_MARKERS_FILE); IOUtil.assertFileIsWritable(OUTPUT); final List<String> sampleNames = new ArrayList<>(); Integer numberOfLoci = null; try (IlluminaAdpcFileWriter adpcFileWriter = new IlluminaAdpcFileWriter(OUTPUT)) { for (final File inputVcf : inputs) { VCFFileReader vcfFileReader = new VCFFileReader(inputVcf, false); final VCFHeader header = vcfFileReader.getFileHeader(); for (int sampleNumber = 0; sampleNumber < header.getNGenotypeSamples(); sampleNumber++) { final String sampleName = header.getGenotypeSamples().get(sampleNumber); sampleNames.add(sampleName); log.info("Processing sample: " + sampleName + " from VCF: " + inputVcf.getAbsolutePath()); CloseableIterator<VariantContext> variants = vcfFileReader.iterator(); int lociCount = 0; while (variants.hasNext()) { final VariantContext context = variants.next(); final float gcScore = getFloatAttribute(context, InfiniumVcfFields.GC_SCORE); final Genotype genotype = context.getGenotype(sampleNumber); final IlluminaGenotype illuminaGenotype = getIlluminaGenotype(genotype, context); final int rawXIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.X); final int rawYIntensity = getUnsignedShortAttributeAsInt(genotype, InfiniumVcfFields.Y); final Float normalizedXIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMX); final Float normalizedYIntensity = getFloatAttribute(genotype, InfiniumVcfFields.NORMY); final IlluminaAdpcFileWriter.Record record = new IlluminaAdpcFileWriter.Record(rawXIntensity, rawYIntensity, normalizedXIntensity, normalizedYIntensity, gcScore, illuminaGenotype); adpcFileWriter.write(record); lociCount++; } if (lociCount == 0) { throw new PicardException("Found no records in VCF' " + inputVcf.getAbsolutePath() + "'"); } if (numberOfLoci == null) { numberOfLoci = lociCount; } else { if (lociCount != numberOfLoci) { throw new PicardException("VCFs have differing number of loci"); } } } } writeTextToFile(SAMPLES_FILE, StringUtils.join(sampleNames, "\n")); writeTextToFile(NUM_MARKERS_FILE, "" + numberOfLoci); } catch (Exception e) { log.error(e); return 1; } return 0; }
Example 18
Source File: GermlineCNVSegmentVariantComposerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test(dataProvider = "variantCompositionSettings") public void testVariantComposition(final int refAutosomalCopyNumber, final Set<String> allosomalContigs) { /* read a segments collection */ final IntegerCopyNumberSegmentCollection collection = new IntegerCopyNumberSegmentCollection( IntegerCopyNumberSegmentCollectionUnitTest.TEST_INTEGER_COPY_NUMBER_SEGMENTS_FILE); final File segmentsOutputVCF = createTempFile("test-write-segments", ".vcf"); final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(segmentsOutputVCF.toPath(), collection.getMetadata().getSequenceDictionary(), false); final GermlineCNVSegmentVariantComposer variantComposer = new GermlineCNVSegmentVariantComposer( writer, IntegerCopyNumberSegmentCollectionUnitTest.EXPECTED_SAMPLE_NAME, new IntegerCopyNumberState(refAutosomalCopyNumber), allosomalContigs); /* compose segments and assert correctness */ for (final IntegerCopyNumberSegment segment: collection.getRecords()) { final VariantContext var = variantComposer.composeVariantContext(segment); Assert.assertEquals(var.getContig(), segment.getContig()); Assert.assertEquals(var.getStart(), segment.getStart()); Assert.assertEquals(var.getEnd(), segment.getEnd()); final Genotype gt = var.getGenotype(IntegerCopyNumberSegmentCollectionUnitTest.EXPECTED_SAMPLE_NAME); /* assert allele correctness */ final Allele actualAllele = gt.getAlleles().get(0); final int refCopyNumber = allosomalContigs.contains(segment.getContig()) ? segment.getBaselineIntegerCopyNumberState().getCopyNumber() : refAutosomalCopyNumber; final Allele expectedAllele; if (segment.getCallIntegerCopyNumberState().getCopyNumber() > refCopyNumber) { expectedAllele = GermlineCNVSegmentVariantComposer.DUP_ALLELE; } else if (segment.getCallIntegerCopyNumberState().getCopyNumber() < refCopyNumber) { expectedAllele = GermlineCNVSegmentVariantComposer.DEL_ALLELE; } else { expectedAllele = GermlineCNVSegmentVariantComposer.REF_ALLELE; } Assert.assertEquals(actualAllele, expectedAllele); Assert.assertTrue(var.getAlleles().size() == (expectedAllele.equals(GermlineCNVSegmentVariantComposer.REF_ALLELE) ? 1 : 2)); Assert.assertTrue(var.getAlleles().contains(Allele.REF_N)); Assert.assertTrue(var.getAlleles().contains(expectedAllele)); /* assert correctness of quality metrics */ Assert.assertEquals( (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QS), FastMath.round(segment.getQualitySomeCalled())); Assert.assertEquals( (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QA), FastMath.round(segment.getQualityAllCalled())); Assert.assertEquals( (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QSS), FastMath.round(segment.getQualityStart())); Assert.assertEquals( (long) gt.getExtendedAttribute(GermlineCNVSegmentVariantComposer.QSE), FastMath.round(segment.getQualityEnd())); } }