Java Code Examples for htsjdk.variant.variantcontext.VariantContextBuilder#genotypes()
The following examples show how to use
htsjdk.variant.variantcontext.VariantContextBuilder#genotypes() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) { if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) { return vc; } final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc); for (String infoField : infoAnnotationsToDrop) { rmAnnotationsBuilder.rmAttribute(infoField); } if (!genotypeAnnotationsToDrop.isEmpty()) { final ArrayList<Genotype> genotypesToWrite = new ArrayList<>(); for (Genotype genotype : vc.getGenotypes()) { final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes(); final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes()); for (String genotypeAnnotation : genotypeAnnotationsToDrop) { attributes.remove(genotypeAnnotation); } genotypeBuilder.attributes(attributes); genotypesToWrite.add(genotypeBuilder.make()); } rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite)); } return rmAnnotationsBuilder.make(); }
Example 2
Source File: VariantEnricher.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void buildVariants(final String sampleId, List<BachelorGermlineVariant> bachRecords) { for (final BachelorGermlineVariant bachRecord : bachRecords) { VariantContextBuilder builder = new VariantContextBuilder(); builder.id(bachRecord.VariantId); builder.loc(bachRecord.Chromosome, bachRecord.Position, bachRecord.Position + bachRecord.Ref.length() - 1); List<Allele> alleles = Lists.newArrayList(); alleles.add(Allele.create(bachRecord.Ref, true)); alleles.add(Allele.create(bachRecord.Alts, false)); builder.alleles(alleles); List<Genotype> genoTypes = Lists.newArrayList(); GenotypeBuilder gBuilder = new GenotypeBuilder(sampleId, builder.getAlleles()); int[] adCounts = { bachRecord.getTumorRefCount(), bachRecord.getTumorAltCount() }; gBuilder.AD(adCounts); gBuilder.DP(bachRecord.getGermlineReadDepth()); genoTypes.add(gBuilder.make()); builder.genotypes(genoTypes); VariantContext variantContext = builder.make(); variantContext.getCommonInfo().addFilter("PASS"); variantContext.getCommonInfo().putAttribute(SNPEFF_IDENTIFIER, bachRecord.Annotations); bachRecord.setVariantContext(variantContext); SomaticVariant somVariant = SomaticVariantFactory.unfilteredInstance().createSomaticVariant(sampleId, variantContext); bachRecord.setSomaticVariant(somVariant); } }
Example 3
Source File: PositiveAllelicDepth.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void run() { fileWriter.writeHeader(fileReader.getFileHeader()); for (final VariantContext context : fileReader) { final VariantContextBuilder builder = new VariantContextBuilder(context); final List<Genotype> genotypes = Lists.newArrayList(); for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) { Genotype genotype = context.getGenotype(genotypeIndex); if (genotype.hasAD() && genotype.hasDP()) { int[] ad = genotype.getAD(); int total = 0; boolean updateRecord = false; for (int i = 0; i < ad.length; i++) { int adValue = ad[i]; if (adValue < 0) { updateRecord = true; ad[i] = Math.abs(adValue); } total += Math.abs(adValue); } if (updateRecord) { LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart()); Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make(); genotypes.add(updated); } else { genotypes.add(genotype); } } } builder.genotypes(genotypes); fileWriter.add(builder.make()); } }
Example 4
Source File: CalculateGenotypePosteriors.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { final Collection<VariantContext> vcs = featureContext.getValues(getDrivingVariantsFeatureInput()); final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants); final int missing = supportVariants.size() - otherVCs.size(); for ( final VariantContext vc : vcs ) { final VariantContext vc_familyPriors; final VariantContext vc_bothPriors; //do family priors first (if applicable) final VariantContextBuilder builder = new VariantContextBuilder(vc); //only compute family priors for biallelelic sites if (!skipFamilyPriors && vc.isBiallelic()){ final GenotypesContext gc = famUtils.calculatePosteriorGLs(vc); builder.genotypes(gc); } VariantContextUtils.calculateChromosomeCounts(builder, false); vc_familyPriors = builder.make(); if (!skipPopulationPriors) { vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff); } else { final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors); VariantContextUtils.calculateChromosomeCounts(builder, false); vc_bothPriors = builder2.make(); } vcfWriter.add(vc_bothPriors); } }
Example 5
Source File: SelectVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF). * * @param vc the VariantContext record to subset * @param preserveAlleles should we trim constant sequence from the beginning and/or end of all alleles, or preserve it? * @param removeUnusedAlternates removes alternate alleles with AC=0 * @return the subsetted VariantContext */ private VariantContext subsetRecord(final VariantContext vc, final boolean preserveAlleles, final boolean removeUnusedAlternates) { //subContextFromSamples() always decodes the vc, which is a fairly expensive operation. Avoid if possible if (noSamplesSpecified && !removeUnusedAlternates) { return vc; } // strip out the alternate alleles that aren't being used final VariantContext sub = vc.subContextFromSamples(samples, removeUnusedAlternates); // If no subsetting happened, exit now if (sub.getNSamples() == vc.getNSamples() && sub.getNAlleles() == vc.getNAlleles()) { return vc; } // fix the PL and AD values if sub has fewer alleles than original vc and remove a fraction of the genotypes if needed final GenotypesContext oldGs = sub.getGenotypes(); GenotypesContext newGC = sub.getNAlleles() == vc.getNAlleles() ? oldGs : AlleleSubsettingUtils.subsetAlleles(oldGs, 0, vc.getAlleles(), sub.getAlleles(), GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0)); if (fractionGenotypes > 0) { final List<Genotype> genotypes = newGC.stream().map(genotype -> randomGenotypes.nextDouble() > fractionGenotypes ? genotype : new GenotypeBuilder(genotype).alleles(getNoCallAlleles(genotype.getPloidy())).noGQ().make()).collect(Collectors.toList()); newGC = GenotypesContext.create(new ArrayList<>(genotypes)); } // since the VC has been subset (either by sample or allele), we need to strip out the MLE tags final VariantContextBuilder builder = new VariantContextBuilder(sub); builder.rmAttributes(Arrays.asList(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY)); builder.genotypes(newGC); addAnnotations(builder, vc, sub.getSampleNames()); final VariantContext subset = builder.make(); return preserveAlleles? subset : GATKVariantContextUtils.trimAlleles(subset,true,true); }
Example 6
Source File: CalculateGenotypePosteriors.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { final Collection<VariantContext> otherVCs = featureContext.getValues(supportVariants); //If no resource contains a matching variant, then add numRefIfMissing as a pseudocount to the priors List<VariantContext> resourcesWithMatchingStarts = otherVCs.stream() .filter(vc -> variant.getStart() == vc.getStart()).collect(Collectors.toList()); //do family priors first (if applicable) final VariantContextBuilder builder = new VariantContextBuilder(variant); //only compute family priors for biallelelic sites if (!skipFamilyPriors && variant.isBiallelic()){ final GenotypesContext gc = famUtils.calculatePosteriorGLs(variant); builder.genotypes(gc); } VariantContextUtils.calculateChromosomeCounts(builder, false); final VariantContext vc_familyPriors = builder.make(); final VariantContext vc_bothPriors; if (!skipPopulationPriors) { vc_bothPriors = PosteriorProbabilitiesUtils.calculatePosteriorProbs(vc_familyPriors, resourcesWithMatchingStarts, resourcesWithMatchingStarts.isEmpty() ? numRefIfMissing : 0, options); } else { vc_bothPriors = vc_familyPriors; } vcfWriter.add(vc_bothPriors); }
Example 7
Source File: LiftoverUtils.java From picard with MIT License | 5 votes |
protected static VariantContextBuilder reverseComplementVariantContext(final VariantContext source, final Interval target, final ReferenceSequence refSeq) { if (target.isPositiveStrand()) { throw new IllegalArgumentException("This should only be called for negative strand liftovers"); } final int start = target.getStart(); final int stop = target.getEnd(); final List<Allele> origAlleles = new ArrayList<>(source.getAlleles()); final VariantContextBuilder vcb = new VariantContextBuilder(source); vcb.rmAttribute(VCFConstants.END_KEY) .chr(target.getContig()) .start(start) .stop(stop) .alleles(reverseComplementAlleles(origAlleles)); if (isIndelForLiftover(source)) { // check that the reverse complemented bases match the new reference if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) { return null; } leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq); } vcb.genotypes(fixGenotypes(source.getGenotypes(), origAlleles, vcb.getAlleles())); return vcb; }
Example 8
Source File: LiftoverVcfTest.java From picard with MIT License | 5 votes |
@Test(dataProvider = "leftAlignAllelesData") public void testLeftAlignVariants(final VariantContext source, final ReferenceSequence reference, final VariantContext result) { VariantContextBuilder vcb = new VariantContextBuilder(source); LiftoverUtils.leftAlignVariant(vcb, source.getStart(), source.getEnd(), source.getAlleles(), reference); vcb.genotypes(LiftoverUtils.fixGenotypes(source.getGenotypes(), source.getAlleles(), vcb.getAlleles())); VcfTestUtils.assertEquals(vcb.make(), result); }
Example 9
Source File: VcfOutputRenderer.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override public void write(final VariantContext variant, final FuncotationMap txToFuncotationMap) { // Create a new variant context builder: final VariantContextBuilder variantContextOutputBuilder = new VariantContextBuilder(variant); final StringBuilder funcotatorAnnotationStringBuilder = new StringBuilder(); // Get the old VCF Annotation field and append the new information to it: final Object existingAnnotation = variant.getAttribute(FUNCOTATOR_VCF_FIELD_NAME, null); final List<String> existingAlleleAnnotations; if ( existingAnnotation != null) { existingAlleleAnnotations = Utils.split(existingAnnotation.toString(), ','); } else { existingAlleleAnnotations = Collections.emptyList(); } // Go through each allele and add it to the writer separately: final List<Allele> alternateAlleles = variant.getAlternateAlleles(); for ( int alleleIndex = 0; alleleIndex < alternateAlleles.size() ; ++alleleIndex ) { final Allele altAllele = alternateAlleles.get(alleleIndex); if ( alleleIndex < existingAlleleAnnotations.size() ) { funcotatorAnnotationStringBuilder.append( existingAlleleAnnotations.get(alleleIndex) ); funcotatorAnnotationStringBuilder.append(FIELD_DELIMITER); } for (final String txId : txToFuncotationMap.getTranscriptList()) { funcotatorAnnotationStringBuilder.append(START_TRANSCRIPT_DELIMITER); final List<Funcotation> funcotations = txToFuncotationMap.get(txId); final Funcotation manualAnnotationFuncotation = createManualAnnotationFuncotation(altAllele); funcotatorAnnotationStringBuilder.append( Stream.concat(funcotations.stream(), Stream.of(manualAnnotationFuncotation)) .filter(f -> f.getAltAllele().equals(altAllele)) .filter(f -> f.getFieldNames().size() > 0) .filter(f -> !f.getDataSourceName().equals(FuncotatorConstants.DATASOURCE_NAME_FOR_INPUT_VCFS)) .map(VcfOutputRenderer::adjustIndelAlleleInformation) .map(f -> FuncotatorUtils.renderSanitizedFuncotationForVcf(f, finalFuncotationFieldNames)) .collect(Collectors.joining(FIELD_DELIMITER)) ); funcotatorAnnotationStringBuilder.append(END_TRANSCRIPT_DELIMITER + ALL_TRANSCRIPT_DELIMITER); } // We have a trailing "#" - we need to remove it: funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1); funcotatorAnnotationStringBuilder.append(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR); } // We have a trailing "," - we need to remove it: funcotatorAnnotationStringBuilder.deleteCharAt(funcotatorAnnotationStringBuilder.length()-1); // Add our new annotation: variantContextOutputBuilder.attribute(FUNCOTATOR_VCF_FIELD_NAME, funcotatorAnnotationStringBuilder.toString()); // Add the genotypes from the variant: variantContextOutputBuilder.genotypes( variant.getGenotypes() ); // Render and add our VCF line: vcfWriter.add( variantContextOutputBuilder.make() ); }
Example 10
Source File: LiftoverVcfTest.java From picard with MIT License | 4 votes |
@DataProvider(name = "noCallAndSymbolicData") public Iterator<Object[]> noCallAndSymbolicData() { final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE", 1).attribute("TEST_ATTRIBUTE2", "Hi").attribute("TEST_ATTRIBUTE3", new double[]{1.2, 2.1}); final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1").attribute("TEST_ATTRIBUTE", 1).attribute("TEST_ATTRIBUTE2", "Hi").attribute("TEST_ATTRIBUTE3", new double[]{1.2, 2.1}); result_builder.attribute(LiftoverVcf.ORIGINAL_CONTIG, "chr1"); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1"); final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1"); final List<Object[]> tests = new ArrayList<>(); final Allele CRef = Allele.create("C", true); final Allele GRef = Allele.create("G", true); final Allele T = Allele.create("T", false); final Allele A = Allele.create("A", false); final Allele DEL = Allele.create("*", false); final Allele nonRef = Allele.create("<*>", false); final LiftOver liftOver = new LiftOver(TWO_INTERVAL_CHAIN_FILE); final LiftOver liftOverRC = new LiftOver(CHAIN_FILE); builder.source("test1"); int start = 10; builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef)); result_builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef)); genotypeBuilder.alleles(CollectionUtil.makeList(Allele.create("."), Allele.create("."))); resultGenotypeBuilder.alleles(CollectionUtil.makeList(Allele.create("."), Allele.create("."))); builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); result_builder.attribute(LiftoverVcf.ORIGINAL_START, start); tests.add(new Object[]{liftOver, builder.make(), result_builder.make(), false}); builder.source("test2"); builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL)); result_builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL)); result_builder.attribute(LiftoverVcf.ORIGINAL_START, start); genotypeBuilder.alleles(CollectionUtil.makeList(T, DEL)); resultGenotypeBuilder.alleles(CollectionUtil.makeList(T, DEL)); builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOver, builder.make(), result_builder.make(), false}); //reverse complement builder.source("test3"); int offset = 3; start = CHAIN_SIZE - offset; int liftedStart = 1 + offset; builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, DEL, nonRef)); result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, DEL, nonRef)); result_builder.attribute(LiftoverVcf.ORIGINAL_START, start); result_builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, LiftoverUtils.allelesToStringList(builder.getAlleles())); result_builder.attribute(LiftoverUtils.REV_COMPED_ALLELES, true); genotypeBuilder.alleles(CollectionUtil.makeList(T, DEL)); resultGenotypeBuilder.alleles(CollectionUtil.makeList(A, DEL)); builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOverRC, builder.make(), result_builder.make(), true}); builder.source("test4"); offset = 4; start = CHAIN_SIZE - offset; liftedStart = 1 + offset; builder.start(start).stop(start).alleles(CollectionUtil.makeList(CRef, T, nonRef)); result_builder.start(liftedStart).stop(liftedStart).alleles(CollectionUtil.makeList(GRef, A, nonRef)); result_builder.attribute(LiftoverVcf.ORIGINAL_START, start); result_builder.attribute(LiftoverVcf.ORIGINAL_ALLELES, LiftoverUtils.allelesToStringList(builder.getAlleles())); genotypeBuilder.alleles(CollectionUtil.makeList(T, Allele.NO_CALL)); resultGenotypeBuilder.alleles(CollectionUtil.makeList(A, Allele.NO_CALL)); builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{liftOverRC, builder.make(), result_builder.make(), true}); return tests.iterator(); }
Example 11
Source File: LiftoverVcfTest.java From picard with MIT License | 4 votes |
@DataProvider(name = "snpWithChangedRef") public Iterator<Object[]> snpWithChangedRef() { final Allele RefA = Allele.create("A", true); final Allele RefC = Allele.create("C", true); final Allele A = Allele.create("A", false); final Allele C = Allele.create("C", false); final List<Object[]> tests = new ArrayList<>(); final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1"); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder("test1"); final GenotypeBuilder resultGenotypeBuilder = new GenotypeBuilder("test1"); final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1").attribute(LiftoverUtils.SWAPPED_ALLELES, true); // simple snp int start = 12; int stop = 12; builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefA, C)); result_builder.start(start).stop(stop).alleles(CollectionUtil.makeList(A, RefC)); genotypeBuilder.alleles(builder.getAlleles()).AD(new int[]{5, 4}).PL(new int[]{40, 0, 60}); resultGenotypeBuilder.alleles(result_builder.getAlleles()).AD(new int[]{4, 5}).PL(new int[]{60, 0, 40}); builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{builder.make(), result_builder.make()}); builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefA, C)); result_builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefC, A)); genotypeBuilder.alleles(Arrays.asList(C, C)).AD(new int[]{0, 10}).PL(new int[]{400, 40, 0}); resultGenotypeBuilder.alleles(Arrays.asList(RefC, RefC)).AD(new int[]{10, 0}).PL(new int[]{0, 40, 400}); builder.genotypes(genotypeBuilder.make()); result_builder.genotypes(resultGenotypeBuilder.make()); tests.add(new Object[]{builder.make(), result_builder.make()}); return tests.iterator(); }
Example 12
Source File: CallingMetricAccumulatorTest.java From picard with MIT License | 4 votes |
@DataProvider(name = "getSingletonSampleData") public Object[][] getSingletonSampleData() { final List<Object[]> retval = new ArrayList<>(10); final Allele ARef = Allele.create("A", true); final Allele G = Allele.create("G", false); final Allele C = Allele.create("C", false); final GenotypeBuilder genotypeBuilder = new GenotypeBuilder().alleles(CollectionUtil.makeList(ARef, C)); final VariantContextBuilder builder = new VariantContextBuilder(); // one het final Genotype het = genotypeBuilder.name("het").make(); builder.chr("1").start(1).stop(1).alleles(CollectionUtil.makeList(ARef, C, G)).genotypes(Collections.singletonList(het)); retval.add(new Object[]{builder.make(), "het"}); //a het and a hom ref final Genotype homref = genotypeBuilder.name("homref").alleles(CollectionUtil.makeList(ARef)).make(); builder.genotypes(CollectionUtil.makeList(het, homref)); retval.add(new Object[]{builder.make(), "het"}); // a het, a homvar and a homref final Genotype homvar = genotypeBuilder.name("homvar").alleles(CollectionUtil.makeList(C)).make(); builder.genotypes(CollectionUtil.makeList(het, homref, homvar)); retval.add(new Object[]{builder.make(), null}); // two hets and a homref final Genotype het2 = genotypeBuilder.name("het2").alleles(CollectionUtil.makeList(ARef, G)).make(); builder.genotypes(CollectionUtil.makeList(het, homref, het2)); retval.add(new Object[]{builder.make(), null}); // a homvar builder.genotypes(CollectionUtil.makeList(homvar)); retval.add(new Object[]{builder.make(), null}); // a homvar, and a homref builder.genotypes(CollectionUtil.makeList(homvar, homref)); retval.add(new Object[]{builder.make(), null}); // two homrefs final Genotype homref2 = genotypeBuilder.name("homref2").alleles(CollectionUtil.makeList(ARef)).make(); builder.genotypes(CollectionUtil.makeList(homref, homref2)); retval.add(new Object[]{builder.make(), null}); return retval.toArray(new Object[retval.size()][]); }
Example 13
Source File: LiftoverUtils.java From picard with MIT License | 4 votes |
/** * method to swap the reference and alt alleles of a bi-allelic, SNP * * @param vc the {@link VariantContext} (bi-allelic SNP) that needs to have it's REF and ALT alleles swapped. * @param annotationsToReverse INFO field annotations (of double value) that will be reversed (x->1-x) * @param annotationsToDrop INFO field annotations that will be dropped from the result since they are invalid when REF and ALT are swapped * @return a new {@link VariantContext} with alleles swapped, INFO fields modified and in the genotypes, GT, AD and PL corrected appropriately */ public static VariantContext swapRefAlt(final VariantContext vc, final Collection<String> annotationsToReverse, final Collection<String> annotationsToDrop) { if (!vc.isBiallelic() || !vc.isSNP()) { throw new IllegalArgumentException("swapRefAlt can only process biallelic, SNPS, found " + vc.toString()); } final VariantContextBuilder swappedBuilder = new VariantContextBuilder(vc); swappedBuilder.attribute(SWAPPED_ALLELES, true); // Use getBaseString() (rather than the Allele itself) in order to create new Alleles with swapped // reference and non-variant attributes swappedBuilder.alleles(Arrays.asList(vc.getAlleles().get(1).getBaseString(), vc.getAlleles().get(0).getBaseString())); final Map<Allele, Allele> alleleMap = new HashMap<>(); // A mapping from the old allele to the new allele, to be used when fixing the genotypes alleleMap.put(vc.getAlleles().get(0), swappedBuilder.getAlleles().get(1)); alleleMap.put(vc.getAlleles().get(1), swappedBuilder.getAlleles().get(0)); final GenotypesContext swappedGenotypes = GenotypesContext.create(vc.getGenotypes().size()); for (final Genotype genotype : vc.getGenotypes()) { final List<Allele> swappedAlleles = new ArrayList<>(); for (final Allele allele : genotype.getAlleles()) { if (allele.isNoCall()) { swappedAlleles.add(allele); } else { swappedAlleles.add(alleleMap.get(allele)); } } // Flip AD final GenotypeBuilder builder = new GenotypeBuilder(genotype).alleles(swappedAlleles); if (genotype.hasAD() && genotype.getAD().length == 2) { final int[] ad = ArrayUtils.clone(genotype.getAD()); ArrayUtils.reverse(ad); builder.AD(ad); } else { builder.noAD(); } //Flip PL if (genotype.hasPL() && genotype.getPL().length == 3) { final int[] pl = ArrayUtils.clone(genotype.getPL()); ArrayUtils.reverse(pl); builder.PL(pl); } else { builder.noPL(); } swappedGenotypes.add(builder.make()); } swappedBuilder.genotypes(swappedGenotypes); for (final String key : vc.getAttributes().keySet()) { if (annotationsToDrop.contains(key)) { swappedBuilder.rmAttribute(key); } else if (annotationsToReverse.contains(key) && !vc.getAttributeAsString(key, "").equals(VCFConstants.MISSING_VALUE_v4)) { final double attributeToReverse = vc.getAttributeAsDouble(key, -1); if (attributeToReverse < 0 || attributeToReverse > 1) { log.warn("Trying to reverse attribute " + key + " but found value that isn't between 0 and 1: (" + attributeToReverse + ") in variant " + vc + ". Results might be wrong."); } swappedBuilder.attribute(key, 1 - attributeToReverse); } } return swappedBuilder.make(); }
Example 14
Source File: CombineGenotypingArrayVcfs.java From picard with MIT License | 4 votes |
/** * Merges multiple VariantContexts all for the same locus into a single hybrid. * * @param variantContexts list of VCs * @return new VariantContext representing the merge of variantContexts */ public static VariantContext merge(final List<VariantContext> variantContexts) { if ( variantContexts == null || variantContexts.isEmpty() ) return null; // establish the baseline info from the first VC final VariantContext first = variantContexts.get(0); final String name = first.getSource(); final Set<String> filters = new HashSet<>(); int depth = 0; double log10PError = CommonInfo.NO_LOG10_PERROR; boolean anyVCHadFiltersApplied = false; GenotypesContext genotypes = GenotypesContext.create(); // Go through all the VCs, verify that the loci and ID and other attributes agree. final Map<String, Object> firstAttributes = first.getAttributes(); for (final VariantContext vc : variantContexts ) { if ((vc.getStart() != first.getStart()) || (!vc.getContig().equals(first.getContig()))) { throw new PicardException("Mismatch in loci among input VCFs"); } if (!vc.getID().equals(first.getID())) { throw new PicardException("Mismatch in ID field among input VCFs"); } if (!vc.getReference().equals(first.getReference())) { throw new PicardException("Mismatch in REF allele among input VCFs"); } checkThatAllelesMatch(vc, first); genotypes.addAll(vc.getGenotypes()); // We always take the QUAL of the first VC with a non-MISSING qual for the combined value if ( log10PError == CommonInfo.NO_LOG10_PERROR ) log10PError = vc.getLog10PError(); filters.addAll(vc.getFilters()); anyVCHadFiltersApplied |= vc.filtersWereApplied(); // add attributes // special case DP (add it up) if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0); // Go through all attributes - if any differ (other than AC, AF, or AN) that's an error. (recalc AC, AF, and AN later) for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) { final String key = p.getKey(); if ((!key.equals("AC")) && (!key.equals("AF")) && (!key.equals("AN")) && (!key.equals("devX_AB")) && (!key.equals("devY_AB"))) { final Object value = p.getValue(); final Object extantValue = firstAttributes.get(key); if (extantValue == null) { // attribute in one VCF but not another. Die! throw new PicardException("Attribute '" + key + "' not found in all VCFs"); } else if (!extantValue.equals(value)) { // Attribute disagrees in value between one VCF Die! (if not AC, AF, nor AN, skipped above) throw new PicardException("Values for attribute '" + key + "' disagrees among input VCFs"); } } } } if (depth > 0) firstAttributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(first.getID()); builder.loc(first.getContig(), first.getStart(), first.getEnd()); builder.alleles(first.getAlleles()); builder.genotypes(genotypes); builder.attributes(new TreeMap<>(firstAttributes)); // AC, AF, and AN are recalculated here VariantContextUtils.calculateChromosomeCounts(builder, false); builder.log10PError(log10PError); if ( anyVCHadFiltersApplied ) { builder.filters(filters.isEmpty() ? filters : new TreeSet<>(filters)); } return builder.make(); }
Example 15
Source File: MergePedIntoVcf.java From picard with MIT License | 4 votes |
/** * Writes out the VariantContext objects in the order presented to the supplied output file * in VCF format. */ private void writeVcf(final CloseableIterator<VariantContext> variants, final File output, final SAMSequenceDictionary dict, final VCFHeader vcfHeader, ZCallPedFile zCallPedFile) { try(VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(dict) .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS) .build()) { writer.writeHeader(vcfHeader); while (variants.hasNext()) { final VariantContext context = variants.next(); final VariantContextBuilder builder = new VariantContextBuilder(context); if (zCallThresholds.containsKey(context.getID())) { final String[] zThresh = zCallThresholds.get(context.getID()); builder.attribute(InfiniumVcfFields.ZTHRESH_X, zThresh[0]); builder.attribute(InfiniumVcfFields.ZTHRESH_Y, zThresh[1]); } final Genotype originalGenotype = context.getGenotype(0); final Map<String, Object> newAttributes = originalGenotype.getExtendedAttributes(); final VCFEncoder vcfEncoder = new VCFEncoder(vcfHeader, false, false); final Map<Allele, String> alleleMap = vcfEncoder.buildAlleleStrings(context); final String zCallAlleles = zCallPedFile.getAlleles(context.getID()); if (zCallAlleles == null) { throw new PicardException("No zCall alleles found for snp " + context.getID()); } final List<Allele> zCallPedFileAlleles = buildNewAllelesFromZCall(zCallAlleles, context.getAttributes()); newAttributes.put(InfiniumVcfFields.GTA, alleleMap.get(originalGenotype.getAllele(0)) + UNPHASED + alleleMap.get(originalGenotype.getAllele(1))); newAttributes.put(InfiniumVcfFields.GTZ, alleleMap.get(zCallPedFileAlleles.get(0)) + UNPHASED + alleleMap.get(zCallPedFileAlleles.get(1))); final Genotype newGenotype = GenotypeBuilder.create(originalGenotype.getSampleName(), zCallPedFileAlleles, newAttributes); builder.genotypes(newGenotype); logger.record("0", 0); // AC, AF, and AN are recalculated here VariantContextUtils.calculateChromosomeCounts(builder, false); final VariantContext newContext = builder.make(); writer.add(newContext); } } }
Example 16
Source File: GtcToVcf.java From picard with MIT License | 4 votes |
private VariantContext makeVariantContext(Build37ExtendedIlluminaManifestRecord record, final InfiniumGTCRecord gtcRecord, final InfiniumEGTFile egtFile, final ProgressLogger progressLogger) { // If the record is not flagged as errant in the manifest we include it in the VCF Allele A = record.getAlleleA(); Allele B = record.getAlleleB(); Allele ref = record.getRefAllele(); final String chr = record.getB37Chr(); final Integer position = record.getB37Pos(); final Integer endPosition = position + ref.length() - 1; Integer egtIndex = egtFile.rsNameToIndex.get(record.getName()); if (egtIndex == null) { throw new PicardException("Found no record in cluster file for manifest entry '" + record.getName() + "'"); } progressLogger.record(chr, position); // Create list of unique alleles final List<Allele> assayAlleles = new ArrayList<>(); assayAlleles.add(ref); if (A.equals(B)) { throw new PicardException("Found same allele (" + A.getDisplayString() + ") for A and B "); } if (!ref.equals(A, true)) { assayAlleles.add(A); } if (!ref.equals(B, true)) { assayAlleles.add(B); } final String sampleName = FilenameUtils.removeExtension(INPUT.getName()); final Genotype genotype = getGenotype(sampleName, gtcRecord, record, A, B); final VariantContextBuilder builder = new VariantContextBuilder(); builder.source(record.getName()); builder.chr(chr); builder.start(position); builder.stop(endPosition); builder.alleles(assayAlleles); builder.log10PError(VariantContext.NO_LOG10_PERROR); builder.id(record.getName()); builder.genotypes(genotype); VariantContextUtils.calculateChromosomeCounts(builder, false); //custom info fields builder.attribute(InfiniumVcfFields.ALLELE_A, record.getAlleleA()); builder.attribute(InfiniumVcfFields.ALLELE_B, record.getAlleleB()); builder.attribute(InfiniumVcfFields.ILLUMINA_STRAND, record.getIlmnStrand()); builder.attribute(InfiniumVcfFields.PROBE_A, record.getAlleleAProbeSeq()); builder.attribute(InfiniumVcfFields.PROBE_B, record.getAlleleBProbeSeq()); builder.attribute(InfiniumVcfFields.BEADSET_ID, record.getBeadSetId()); builder.attribute(InfiniumVcfFields.ILLUMINA_CHR, record.getChr()); builder.attribute(InfiniumVcfFields.ILLUMINA_POS, record.getPosition()); builder.attribute(InfiniumVcfFields.ILLUMINA_BUILD, record.getGenomeBuild()); builder.attribute(InfiniumVcfFields.SOURCE, record.getSource().replace(' ', '_')); builder.attribute(InfiniumVcfFields.GC_SCORE, formatFloatForVcf(egtFile.totalScore[egtIndex])); for (InfiniumVcfFields.GENOTYPE_VALUES gtValue : InfiniumVcfFields.GENOTYPE_VALUES.values()) { final int ordinalValue = gtValue.ordinal(); builder.attribute(InfiniumVcfFields.N[ordinalValue], egtFile.n[egtIndex][ordinalValue]); builder.attribute(InfiniumVcfFields.DEV_R[ordinalValue], formatFloatForVcf(egtFile.devR[egtIndex][ordinalValue])); builder.attribute(InfiniumVcfFields.MEAN_R[ordinalValue], formatFloatForVcf(egtFile.meanR[egtIndex][ordinalValue])); builder.attribute(InfiniumVcfFields.DEV_THETA[ordinalValue], formatFloatForVcf(egtFile.devTheta[egtIndex][ordinalValue])); builder.attribute(InfiniumVcfFields.MEAN_THETA[ordinalValue], formatFloatForVcf(egtFile.meanTheta[egtIndex][ordinalValue])); final EuclideanValues genotypeEuclideanValues = polarToEuclidean(egtFile.meanR[egtIndex][ordinalValue], egtFile.devR[egtIndex][ordinalValue], egtFile.meanTheta[egtIndex][ordinalValue], egtFile.devTheta[egtIndex][ordinalValue]); builder.attribute(InfiniumVcfFields.DEV_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devX)); builder.attribute(InfiniumVcfFields.MEAN_X[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanX)); builder.attribute(InfiniumVcfFields.DEV_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.devY)); builder.attribute(InfiniumVcfFields.MEAN_Y[ordinalValue], formatFloatForVcf(genotypeEuclideanValues.meanY)); } final String rsid = record.getRsId(); if (StringUtils.isNotEmpty(rsid)) { builder.attribute(InfiniumVcfFields.RS_ID, rsid); } if (egtFile.totalScore[egtIndex] == 0.0) { builder.filter(InfiniumVcfFields.ZEROED_OUT_ASSAY); if (genotype.isCalled()) { if (DO_NOT_ALLOW_CALLS_ON_ZEROED_OUT_ASSAYS) { throw new PicardException("Found a call (genotype: " + genotype + ") on a zeroed out Assay!!"); } else { log.warn("Found a call (genotype: " + genotype + ") on a zeroed out Assay. " + "This could occur if you called genotypes on a different cluster file than used here."); } } } if (record.isDupe()) { builder.filter(InfiniumVcfFields.DUPE); } return builder.make(); }
Example 17
Source File: VariantToVcf.java From genomewarp with Apache License 2.0 | 4 votes |
private static VariantContext getContextFromVariant(VCFHeader header, Variant in) { // Create allele list String refBases = in.getReferenceBases(); List<String> altBases = in.getAlternateBasesList(); List<Allele> alleleList = new ArrayList<>(); alleleList.add(Allele.create(refBases.getBytes(StandardCharsets.UTF_8), true)); for (String base : altBases) { alleleList.add(Allele.create(base.getBytes(StandardCharsets.UTF_8), false)); } // Note htsjdk start/end are both 1-based closed VariantContextBuilder builder = new VariantContextBuilder(SOURCE, in.getReferenceName(), in.getStart() + 1, in.getEnd(), alleleList); // Set Quality if (in.getQuality() != 0.0) { builder.log10PError(-in.getQuality() / 10); } // Set names int numNames = in.getNamesCount(); int i = 0; String namesString = ""; for (String name : in.getNamesList()) { if (i == numNames - 1) { break; } namesString += name + ","; } if (numNames == 0) { builder.noID(); } else { // Also handle fence post builder.id(namesString + in.getNames(numNames - 1)); } // Set filters boolean hadFilter = false; for (String filter : in.getFilterList()) { hadFilter = true; if (filter.equals(VCFConstants.PASSES_FILTERS_v4)) { builder.passFilters(); break; } if (filter.equals(VCFConstants.MISSING_VALUE_v4)) { builder.unfiltered(); break; } builder.filter(filter); } if (!hadFilter) { builder.unfiltered(); } // Set info Map<String, Object> toSet = new HashMap<>(); for (Map.Entry<String, ListValue> entry : in.getInfo().entrySet()) { String currKey = entry.getKey(); List<Value> currValue = entry.getValue().getValuesList(); int currSize = currValue.size(); if (currSize == 0) { // If the key is present in the info map, there must // be non-zero attributes associated with it throw new IllegalStateException(String.format("info field %s has length 0", currKey)); } VCFHeaderLineType type = header.getInfoHeaderLine(currKey).getType(); if (currSize == 1) { toSet.put(currKey, parseObject(currKey, currValue.get(0), type)); continue; } List<Object> objectList = new ArrayList<>(); for (Value val : currValue) { objectList.add(parseObject(currKey, val, type)); } toSet.put(currKey, objectList); } builder.attributes(toSet); // Set calls List<Genotype> genotypes = new ArrayList<>(); for (VariantCall vc : in.getCallsList()) { genotypes.add(getGenotypeFromVariantCall(header, vc, alleleList)); } if (genotypes.size() == 0) { builder.noGenotypes(); } else { builder.genotypes(genotypes); } return builder.make(); }