htsjdk.variant.variantcontext.GenotypeBuilder Java Examples
The following examples show how to use
htsjdk.variant.variantcontext.GenotypeBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AmberVCF.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull private VariantContext create(@NotNull final BaseDepth snp) { final List<Allele> alleles = Lists.newArrayList(); alleles.add(Allele.create(snp.ref().toString(), true)); alleles.add(Allele.create(snp.alt().toString(), false)); final List<Integer> adField = Lists.newArrayList(); adField.add(snp.refSupport()); adField.add(snp.altSupport()); final Genotype normal = new GenotypeBuilder(config.primaryReference()).DP(snp.readDepth()) .AD(adField.stream().mapToInt(i -> i).toArray()) .alleles(alleles) .make(); final VariantContextBuilder builder = new VariantContextBuilder().chr(snp.chromosome()) .start(snp.position()) .computeEndFromAlleles(alleles, (int) snp.position()) .genotypes(normal) .alleles(alleles); return builder.make(); }
Example #2
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) { if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) { return vc; } final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc); for (String infoField : infoAnnotationsToDrop) { rmAnnotationsBuilder.rmAttribute(infoField); } if (!genotypeAnnotationsToDrop.isEmpty()) { final ArrayList<Genotype> genotypesToWrite = new ArrayList<>(); for (Genotype genotype : vc.getGenotypes()) { final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes()); for (String genotypeAnnotation : genotypeAnnotationsToDrop) { attributes.remove(genotypeAnnotation); } genotypeBuilder.attributes(attributes); genotypesToWrite.add(genotypeBuilder.make()); } rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); } return rmAnnotationsBuilder.make(); }
Example #3
Source File: EvaluateCopyNumberTriStateCalls.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls, final TargetCollection<Target> targets) { final Genotype truthGenotype = truth.getGenotype(sample); // if there is no truth genotype for that sample, we output the "empty" genotype. if (truthGenotype == null) { return GenotypeBuilder.create(sample, Collections.emptyList()); } final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber); final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber); final List<Pair<VariantContext, Genotype>> allCalls = calls.stream() .map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))) .filter(pair -> pair.getRight() != null) .filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)) .collect(Collectors.toList()); final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls); return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber, truthAllele, qualifiedCalls); }
Example #4
Source File: PerAlleleAnnotation.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Calculate annotations for eah allele based on given VariantContext and likelihoods for a given genotype's sample * and add the annotations to the GenotypeBuilder. See parent class docs in {@link GenotypeAnnotation}. */ public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final ReadLikelihoods<Allele> likelihoods) { Utils.nonNull(gb); Utils.nonNull(vc); if ( g == null || likelihoods == null ) { return; } final Map<Allele, List<Integer>> values = likelihoods.alleles().stream() .collect(Collectors.toMap(a -> a, a -> new ArrayList<>())); Utils.stream(likelihoods.bestAlleles(g.getSampleName())) .filter(ba -> ba.isInformative() && isUsableRead(ba.read)) .forEach(ba -> getValueForRead(ba.read, vc).ifPresent(v -> values.get(ba.allele).add(v))); final int[] statistics = vc.getAlleles().stream().mapToInt(a -> aggregate(values.get(a))).toArray(); gb.attribute(getVcfKey(), statistics); }
Example #5
Source File: DepthPerAlleleBySample.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final AlleleLikelihoods<GATKRead, Allele> likelihoods) { Utils.nonNull(gb, "gb is null"); Utils.nonNull(vc, "vc is null"); if ( g == null || !g.isCalled() || likelihoods == null) { return; } final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles()); // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a subset of AlleleLikelihoods alleles " + likelihoods.alleles()); gb.AD(annotateWithLikelihoods(vc, g, alleles, likelihoods)); }
Example #6
Source File: StrandBiasBySample.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final AlleleLikelihoods<GATKRead, Allele> likelihoods) { Utils.nonNull(vc); Utils.nonNull(g); Utils.nonNull(gb); if ( likelihoods == null || !g.isCalled() ) { logger.warn("Annotation will not be calculated, genotype is not called or alleleLikelihoodMap is null"); return; } final int[][] table = FisherStrand.getContingencyTable(likelihoods, vc, 0, Arrays.asList(g.getSampleName())); gb.attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, getContingencyArray(table)); }
Example #7
Source File: HomRefBlock.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override Genotype createHomRefGenotype(final String sampleName, final boolean floorBlocks) { final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(getPloidy(), getRef())); gb.noAD().noPL().noAttributes(); // clear all attributes final int[] minPLs = getMinPLs(); final int[] minPPs = getMinPPs(); if (!floorBlocks) { gb.PL(minPLs); gb.GQ(GATKVariantContextUtils.calculateGQFromPLs(minPPs != null ? minPPs : minPLs)); gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP()); } else { gb.GQ(getGQLowerBound()); } gb.DP(getMedianDP()); if (minPPs != null) { gb.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY, Utils.listFromPrimitives(minPPs)); } return gb.make(); }
Example #8
Source File: SageVariantContextFactory.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull private static Genotype createGenotype(boolean germline, @NotNull final VariantHotspot variant, @NotNull final ReadContextCounter counter) { return new GenotypeBuilder(counter.sample()).DP(counter.depth()) .AD(new int[] { counter.refSupport(), counter.altSupport() }) .attribute(READ_CONTEXT_QUALITY, counter.quality()) .attribute(READ_CONTEXT_COUNT, counter.counts()) .attribute(READ_CONTEXT_IMPROPER_PAIR, counter.improperPair()) .attribute(READ_CONTEXT_JITTER, counter.jitter()) .attribute(RAW_ALLELIC_DEPTH, new int[] { counter.rawRefSupport(), counter.rawAltSupport() }) .attribute(RAW_ALLELIC_BASE_QUALITY, new int[] { counter.rawRefBaseQuality(), counter.rawAltBaseQuality() }) .attribute(RAW_DEPTH, counter.rawDepth()) .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, counter.vaf()) .alleles(createGenotypeAlleles(germline, variant, counter)) .make(); }
Example #9
Source File: GenotypeConcordanceTest.java From picard with MIT License | 6 votes |
@Test public void testGenotypeConcordanceDetermineStateGq() throws Exception { final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C); final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make(); final List<Allele> allelesLowGq = makeUniqueListOfAlleles(Aref, C); final Genotype gtLowGq = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).GQ(4).make(); final VariantContext vcLowGq = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowGq).genotypes(gtLowGq).make(); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcNormal, CallState.HET_REF_VAR1, 20, 0); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.LOW_GQ, 20, 0); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.LOW_GQ, vcLowGq, CallState.LOW_GQ, 20, 0); testGenotypeConcordanceDetermineState(vcLowGq, TruthState.HET_REF_VAR1, vcLowGq, CallState.HET_REF_VAR1, 2, 0); }
Example #10
Source File: GenotypeConcordanceTest.java From picard with MIT License | 6 votes |
@Test public void testGenotypeConcordanceDetermineStateDp() throws Exception { final List<Allele> allelesNormal = makeUniqueListOfAlleles(Aref, C); final Genotype gtNormal = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vcNormal = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesNormal).genotypes(gtNormal).make(); final List<Allele> allelesLowDp = makeUniqueListOfAlleles(Aref, C); final Genotype gtLowDp = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).DP(4).make(); final VariantContext vcLowDp = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, allelesLowDp).genotypes(gtLowDp).make(); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.LOW_DP, vcNormal, CallState.HET_REF_VAR1, 0, 20); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowDp, CallState.LOW_DP, 0, 20); testGenotypeConcordanceDetermineState(vcNormal, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.LOW_DP, vcLowDp, CallState.LOW_DP, 0, 20); testGenotypeConcordanceDetermineState(vcLowDp, TruthState.HET_REF_VAR1, vcLowDp, CallState.HET_REF_VAR1, 0, 2); }
Example #11
Source File: HomRefBlockUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testToVariantContext(){ final VariantContext vc = getVariantContext(); final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, vc.getAlleles()); final Genotype genotype1 = gb.GQ(15).DP(6).PL(new int[]{0, 10, 100}).make(); final Genotype genotype2 = gb.GQ(17).DP(10).PL(new int[]{0, 5, 80}).make(); final HomRefBlock block = getHomRefBlock(vc); block.add(vc.getEnd(), genotype1); block.add(vc.getEnd() + 1, genotype2); final VariantContext newVc = block.toVariantContext(SAMPLE_NAME, false); Assert.assertEquals(newVc.getGenotypes().size(), 1); final Genotype genotype = newVc.getGenotypes().get(0); Assert.assertEquals(genotype.getDP(),8); //dp should be median of the added DPs Assert.assertEquals(genotype.getGQ(), 5); //GQ should have been recalculated with the minPls Assert.assertTrue(genotype.getAlleles().stream().allMatch(a -> a.equals(REF))); }
Example #12
Source File: StrelkaAllelicDepth.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull public static VariantContext enrich(@NotNull final VariantContext context) { if (!context.isIndel() && !context.isSNP() ) { return context; } final VariantContextBuilder contextBuilder = new VariantContextBuilder(context).noGenotypes(); final List<Allele> alleles = context.getAlleles(); final Function<Allele, String> alleleKey = alleleKey(context); final List<Genotype> updatedGenotypes = Lists.newArrayList(); for (Genotype genotype : context.getGenotypes()) { if (!genotype.hasAD() && hasRequiredAttributes(genotype, alleles, alleleKey)) { final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).AD(readAD(genotype, alleles, alleleKey)); updatedGenotypes.add(genotypeBuilder.make()); } else { updatedGenotypes.add(genotype); } } return contextBuilder.genotypes(updatedGenotypes).make(); }
Example #13
Source File: AlleleFractionGenotypeAnnotationUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@DataProvider(name = "testUsingADDataProvider") public Object[][] testUsingADDataProvider() { final Allele A = Allele.create("A", true); final Allele C = Allele.create("C"); final List<Allele> AA = Arrays.asList(A, A); final List<Allele> AC = Arrays.asList(A, C); final Genotype gAC = new GenotypeBuilder("1", AC).AD(new int[]{5,5}).make(); final Genotype gAA = new GenotypeBuilder("2", AA).AD(new int[]{10,0}).make(); final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).genotypes(Arrays.asList(gAA, gAC)).make(); return new Object[][] { {vc, gAC, new double[]{0.5}}, {vc, gAA, new double[]{0.0}} }; }
Example #14
Source File: VariantContextSingletonFilterTest.java From Drop-seq with MIT License | 6 votes |
@Test public void filterOutHetSinglton() { VariantContextBuilder b = new VariantContextBuilder(); Allele a1 = Allele.create("A", true); Allele a2 = Allele.create("T", false); final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2)); final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1)); final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2)); // this has a het singleton. Collection<Genotype> genotypes = new ArrayList<>(); genotypes.add(GenotypeBuilder.create("donor1", allelesRef)); genotypes.add(GenotypeBuilder.create("donor2", allelesHet)); genotypes.add(GenotypeBuilder.create("donor3", allelesRef)); VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make(); Assert.assertNotNull(vc); Iterator<VariantContext> underlyingIterator = Collections.emptyIterator(); VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true); boolean t1 = f.filterOut(vc); Assert.assertFalse(t1); }
Example #15
Source File: VariantContextSingletonFilterTest.java From Drop-seq with MIT License | 6 votes |
@Test public void filterOutHetSinglto32() { VariantContextBuilder b = new VariantContextBuilder(); Allele a1 = Allele.create("A", true); Allele a2 = Allele.create("T", false); final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2)); final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1)); final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2)); // this does not have a het singleton because it has an alt. Collection<Genotype> genotypes = new ArrayList<>(); genotypes.add(GenotypeBuilder.create("donor1", allelesRef)); genotypes.add(GenotypeBuilder.create("donor2", allelesRef)); genotypes.add(GenotypeBuilder.create("donor3", allelesAlt)); VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make(); Assert.assertNotNull(vc); Iterator<VariantContext> underlyingIterator = Collections.emptyIterator(); VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true); boolean t1 = f.filterOut(vc); Assert.assertTrue(t1); }
Example #16
Source File: VariantContextSingletonFilterTest.java From Drop-seq with MIT License | 6 votes |
@Test public void filterOutHetSinglton2() { VariantContextBuilder b = new VariantContextBuilder(); Allele a1 = Allele.create("A", true); Allele a2 = Allele.create("T", false); final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2)); final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1)); final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2)); // this does not have a het singleton. Collection<Genotype> genotypes = new ArrayList<>(); genotypes.add(GenotypeBuilder.create("donor1", allelesRef)); genotypes.add(GenotypeBuilder.create("donor2", allelesHet)); genotypes.add(GenotypeBuilder.create("donor3", allelesHet)); VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make(); Assert.assertNotNull(vc); Iterator<VariantContext> underlyingIterator = Collections.emptyIterator(); VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true); boolean t1 = f.filterOut(vc); Assert.assertTrue(t1); }
Example #17
Source File: GenotypeConcordanceTest.java From picard with MIT License | 5 votes |
@Test public void testGenotypeConcordanceDetermineStateNull() throws Exception { final List<Allele> alleles = makeUniqueListOfAlleles(Aref, C); final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vc1 = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(gt1).make(); testGenotypeConcordanceDetermineState(null, TruthState.MISSING, null, CallState.MISSING, 0, 0); testGenotypeConcordanceDetermineState(vc1, TruthState.HET_REF_VAR1, null, CallState.MISSING, 0, 0); testGenotypeConcordanceDetermineState(null, TruthState.MISSING, vc1, CallState.HET_REF_VAR1, 0, 0); }
Example #18
Source File: LazyBCFGenotypesContext.java From Hadoop-BAM with MIT License | 5 votes |
@Override public void setHeader(VCFHeader header) { genoFieldDecoders = new BCF2GenotypeFieldDecoders(header); fieldDict = BCF2Utils.makeDictionary(header); builders = new GenotypeBuilder[header.getNGenotypeSamples()]; final List<String> genotypeSamples = header.getGenotypeSamples(); for (int i = 0; i < builders.length; ++i) builders[i] = new GenotypeBuilder(genotypeSamples.get(i)); sampleNamesInOrder = header.getSampleNamesInOrder(); sampleNameToOffset = header.getSampleNameToOffset(); }
Example #19
Source File: TLODBlock.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override Genotype createHomRefGenotype(final String sampleName, final boolean floorBlock) { final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, getRef())); //FIXME: for somatic stuff we output the genotype as diploid because that's familiar for human gb.noAD().noPL().noAttributes(); // clear all attributes gb.attribute(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, minBlockLOD); gb.DP(getMedianDP()); gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, getMinDP()); return gb.make(); }
Example #20
Source File: GenotypeConcordanceTest.java From picard with MIT License | 5 votes |
@Test public void testGenotypeConcordanceDetermineStateFilter() throws Exception { final Set<String> filters = new HashSet<String>(Arrays.asList("BAD!")); // Filtering on the variant context final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C); final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)); final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make(); final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T); final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T)); final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make(); testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0); testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0); testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0); // Filtering on the genotype final List<String> gtFilters = new ArrayList<String>(Arrays.asList("WICKED")); final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C); final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make(); final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make(); testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0); testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0); testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0); }
Example #21
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF). * * @param vc the VariantContext record to subset * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it? * @param removeUnusedAlternates removes alternate alleles with AC=0 * @return the subsetted VariantContext */ private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) { //subContextFromSamples() always decodes the vc, which is a fairly expensive operation. Avoid if possible if (noSamplesSpecified && !removeUnusedAlternates) { return vc; } // strip out the alternate alleles that aren't being used final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates); // If no subsetting happened, exit now if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) { return vc; } // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed final GenotypesContext oldGs = sub.getGenotypes(); GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs : AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0)); if (fractionGenotypes > 0) { final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype : new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList()); newGC = GenotypesContext.create(new ArrayList<>(genotypes)); } // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags final VariantContextBuilder builder = new VariantContextBuilder(sub); builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY)); builder.genotypes(newGC); addAnnotations(builder, vc, sub.getSampleNames()); final VariantContext subset = builder.make(); return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true); }
Example #22
Source File: DepthPerSampleHC.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void annotate( final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final AlleleLikelihoods<GATKRead, Allele> likelihoods ) { Utils.nonNull(vc); Utils.nonNull(g); Utils.nonNull(gb); if ( likelihoods == null || !g.isCalled() ) { logger.warn("Annotation will not be calculated, genotype is not called or alleleLikelihoodMap is null"); return; } // check that there are reads final String sample = g.getSampleName(); if (likelihoods.sampleEvidenceCount(likelihoods.indexOfSample(sample)) == 0) { gb.DP(0); return; } final Set<Allele> alleles = new LinkedHashSet<>(vc.getAlleles()); // make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext if ( !likelihoods.alleles().containsAll(alleles) ) { logger.warn("VC alleles " + alleles + " not a strict subset of AlleleLikelihoods alleles " + likelihoods.alleles()); return; } // the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot // differentiate between reads that align over the event but aren't informative vs. those that aren't even // close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP). final Map<Allele, List<Allele>> alleleSubset = alleles.stream().collect(Collectors.toMap(a -> a, a -> Arrays.asList(a))); final AlleleLikelihoods<GATKRead, Allele> subsettedLikelihoods = likelihoods.marginalize(alleleSubset); final int depth = (int) subsettedLikelihoods.bestAllelesBreakingTies(sample).stream().filter(ba -> ba.isInformative()).count(); gb.DP(depth); }
Example #23
Source File: OrientationBiasReadCounts.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void annotate(final ReferenceContext refContext, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final AlleleLikelihoods<GATKRead, Allele> likelihoods){ Utils.nonNull(gb, "gb is null"); Utils.nonNull(vc, "vc is null"); if (g == null || likelihoods == null) { return; } final Map<Allele, MutableInt> f1r2Counts = likelihoods.alleles().stream() .collect(Collectors.toMap(a -> a, a -> new MutableInt(0))); final Map<Allele, MutableInt> f2r1Counts = likelihoods.alleles().stream() .collect(Collectors.toMap(a -> a, a -> new MutableInt(0))); Utils.stream(likelihoods.bestAllelesBreakingTies(g.getSampleName())) .filter(ba -> ba.isInformative() && isUsableRead(ba.evidence) && BaseQualityRankSumTest.getReadBaseQuality(ba.evidence, vc).orElse(0) >= MINIMUM_BASE_QUALITY) .forEach(ba -> (ReadUtils.isF2R1(ba.evidence) ? f2r1Counts : f1r2Counts).get(ba.allele).increment()); final int[] f1r2 = vc.getAlleles().stream().mapToInt(a -> f1r2Counts.get(a).intValue()).toArray(); final int[] f2r1 = vc.getAlleles().stream().mapToInt(a -> f2r1Counts.get(a).intValue()).toArray(); gb.attribute(GATKVCFConstants.F1R2_KEY, f1r2); gb.attribute(GATKVCFConstants.F2R1_KEY, f2r1); }
Example #24
Source File: OrientationBiasReadCounts.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Annotate the given variant context with the OxoG read count attributes, directly from the read pileup. * * This method may be slow and should be considered EXPERIMENTAL, especially with regard to indels and complex/mixed * variants. * * @param vc variant context for the genotype. Necessary so that we can see all alleles. * @param gb genotype builder to put the annotations into. * @param readPileup pileup of the reads at this vc. Note that this pileup does not have to match the * genotype. In other words, this tool does not check that the pileup was generated from the * genotype sample. */ public static void annotateSingleVariant(final VariantContext vc, final GenotypeBuilder gb, final ReadPileup readPileup, int meanBaseQualityCutoff) { Utils.nonNull(gb, "gb is null"); Utils.nonNull(vc, "vc is null"); // Create a list of unique alleles final List<Allele> variantAllelesWithDupes = vc.getAlleles(); final Set<Allele> alleleSet = new LinkedHashSet<>(variantAllelesWithDupes); final List<Allele> variantAlleles = new ArrayList<>(alleleSet); // Initialize the mappings final Map<Allele, MutableInt> f1r2Counts = variantAlleles.stream() .collect(Collectors.toMap(Function.identity(), a -> new MutableInt(0))); final Map<Allele, MutableInt> f2r1Counts = variantAlleles.stream() .collect(Collectors.toMap(Function.identity(), a -> new MutableInt(0))); final List<Allele> referenceAlleles = variantAlleles.stream().filter(a -> a.isReference() && !a.isSymbolic()).collect(Collectors.toList()); final List<Allele> altAlleles = variantAlleles.stream().filter(a -> a.isNonReference() && !a.isSymbolic()).collect(Collectors.toList()); if (referenceAlleles.size() != 1) { logger.warn("Number of reference alleles does not equal for VC: " + vc); } // We MUST have exactly 1 non-symbolic reference allele and a read pileup, if ((referenceAlleles.size() == 1) && (readPileup != null) && !referenceAlleles.get(0).isSymbolic()) { final Allele referenceAllele = referenceAlleles.get(0); Utils.stream(readPileup) .filter(pe -> isUsableRead(pe.getRead())) .forEach(pe -> incrementCounts(pe, f1r2Counts, f2r1Counts, referenceAllele, altAlleles, meanBaseQualityCutoff)); } final int[] f1r2 = variantAlleles.stream().mapToInt(a -> f1r2Counts.get(a).intValue()).toArray(); final int[] f2r1 = variantAlleles.stream().mapToInt(a -> f2r1Counts.get(a).intValue()).toArray(); gb.attribute(GATKVCFConstants.F1R2_KEY, f1r2); gb.attribute(GATKVCFConstants.F2R1_KEY, f2r1); }
Example #25
Source File: VariantsSparkSinkUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "gvcfCases") public void testEnableDisableGVCFWriting(boolean writeGvcf, String extension) throws IOException { List<VariantContext> vcs = new ArrayList<>(); for(int i = 1; i <= 10; i++) { final Allele A = Allele.create("A", true); final VariantContext vc = new VariantContextBuilder("hand crafted", "1", i, i, Arrays.asList(A, Allele.NON_REF_ALLELE)) .genotypes(new GenotypeBuilder(SAMPLE).alleles(Arrays.asList(A, A)).DP(10).GQ(10).PL(new int[]{0, 60, 10}).make()) .make(); vcs.add(vc); } final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); final File output = createTempFile(outputFileName, extension); VariantsSparkSink.writeVariants(ctx, output.toString(), ctx.parallelize(vcs), getHeader(), writeGvcf, Arrays.asList(100), 2, 1, true, true); checkFileExtensionConsistentWithContents(output.toString(), true); new CommandLineProgramTest(){ @Override public String getTestedToolName(){ return IndexFeatureFile.class.getSimpleName(); } }.runCommandLine(new String[]{"-I", output.getAbsolutePath()}); final List<VariantContext> writtenVcs = readVariants(output.toString()); //if we are actually writing a gvcf, all the variant blocks will be merged into a single homref block with Assert.assertEquals(writtenVcs.size(), writeGvcf ? 1 : 10); Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getStart).min().getAsInt(), 1); Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getEnd).max().getAsInt(), 10); }
Example #26
Source File: HomRefBlockUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testMinMedian() { final VariantContext vc = getVariantContext(); final HomRefBlock band = getHomRefBlock(vc); final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME); gb.alleles(vc.getAlleles()); int pos = band.getStart(); band.add(pos++, gb.DP(10).GQ(11).PL(new int[]{0,11,100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); assertValues(band, 10, 10); band.add(pos++, gb.DP(11).GQ(10).PL(new int[]{0, 10, 100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); assertValues(band, 10, 11); band.add(pos++, gb.DP(12).GQ(12).PL(new int[]{0,12,100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); assertValues(band, 10, 11); band.add(pos++, gb.DP(13).GQ(15).PL(new int[]{0,15,100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); band.add(pos++, gb.DP(14).GQ(16).PL(new int[]{0,16,100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); band.add(pos++, gb.DP(15).GQ(17).PL(new int[]{0,17,100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); band.add(pos++, gb.DP(16).GQ(18).PL(new int[]{0,18,100}).make()); Assert.assertEquals(band.getEnd(), pos - 1); assertValues(band, 10, 13); Assert.assertEquals(band.getSize(), pos - vc.getStart()); Assert.assertEquals(band.getMinPLs(), new int[]{0, 10, 100}); }
Example #27
Source File: HaplotypeCallerIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static String keyForVariant( final VariantContext variant ) { Genotype genotype = variant.getGenotype(0); if (genotype.isPhased()) { // unphase it for comparisons, since we rely on comparing the genotype string below genotype = new GenotypeBuilder(genotype).phased(false).make(); } return String.format("%s:%d-%d %s %s", variant.getContig(), variant.getStart(), variant.getEnd(), variant.getAlleles(), genotype.getGenotypeString(false)); }
Example #28
Source File: GenomicsDBImportIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static File createInputVCF(final String sampleName) { final String contig = "chr20"; final SAMSequenceDictionary dict = new SAMSequenceDictionary( Collections.singletonList(new SAMSequenceRecord(contig, 64444167))); final VCFFormatHeaderLine formatField = new VCFFormatHeaderLine(SAMPLE_NAME_KEY, 1, VCFHeaderLineType.String, "the name of the sample this genotype came from"); final Set<VCFHeaderLine> headerLines = new HashSet<>(); headerLines.add(formatField); headerLines.add(new VCFFormatHeaderLine(ANOTHER_ATTRIBUTE_KEY, 1, VCFHeaderLineType.Integer, "Another value")); headerLines.add(VCFStandardHeaderLines.getFormatLine("GT")); final File out = createTempFile(sampleName +"_", ".vcf"); try (final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(out.toPath(), dict, false, Options.INDEX_ON_THE_FLY)) { final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.singleton(sampleName)); vcfHeader.setSequenceDictionary(dict); writer.writeHeader(vcfHeader); final Allele Aref = Allele.create("A", true); final Allele C = Allele.create("C"); final List<Allele> alleles = Arrays.asList(Aref, C); final VariantContext variant = new VariantContextBuilder("invented", contig, INTERVAL.get(0).getStart(), INTERVAL.get(0).getStart(), alleles) .genotypes(new GenotypeBuilder(sampleName, alleles).attribute(SAMPLE_NAME_KEY, sampleName) .attribute(ANOTHER_ATTRIBUTE_KEY, 10).make()) .make(); writer.add(variant); return out; } }
Example #29
Source File: AllelicDepthTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@Test public void allelicDepthFromAd() { final Genotype genotype = new GenotypeBuilder("SAMPLE").AD(new int[] { 6, 4 }).make(); final AllelicDepth victim = AllelicDepth.fromGenotype(genotype); assertEquals(10, victim.totalReadCount()); assertEquals(0.4, victim.alleleFrequency(), 0.01); }
Example #30
Source File: AllelicDepthTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@Test public void useADIfLargerThanDP() { final Genotype genotype = new GenotypeBuilder("SAMPLE").AD(new int[] { 6, 4 }).DP(5).make(); final AllelicDepth victim = AllelicDepth.fromGenotype(genotype); assertEquals(10, victim.totalReadCount()); assertEquals(0.4, victim.alleleFrequency(), 0.01); }