htsjdk.variant.variantcontext.writer.VariantContextWriter Java Examples
The following examples show how to use
htsjdk.variant.variantcontext.writer.VariantContextWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HMMPostProcessor.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * For each segment in genotypingSegments, compose the variant context and write to outputWriter * * @param genotypingSegments a list of genotyping segments * @param outputWriter a VCF writer * @param variantPrefix a prefix for composing variant IDs * @param commandLine (optional) command line used to generated the data */ private void composeVariantContextAndWrite(@Nonnull final List<GenotypingSegment> genotypingSegments, @Nonnull final VariantContextWriter outputWriter, @Nonnull final String variantPrefix, @Nullable final String commandLine) { outputWriter.writeHeader(composeHeader(commandLine)); int counter = 0; int prevReportedDonePercentage = -1; for (final GenotypingSegment segment : genotypingSegments) { final int donePercentage = (int)(100 * counter / (double)genotypingSegments.size()); if (donePercentage % 10 == 0 && prevReportedDonePercentage != donePercentage) { logger.info(String.format("%d%% done...", donePercentage)); prevReportedDonePercentage = donePercentage; } final VariantContext variant = composeVariantContext(segment, variantPrefix); counter++; outputWriter.add(variant); } logger.info("100% done."); }
Example #2
Source File: GtcToVcf.java From picard with MIT License | 6 votes |
/** * Writes out the VariantContext objects in the order presented to the supplied output file * in VCF format. */ private void writeVcf(final SortingCollection<VariantContext> variants, final File output, final SAMSequenceDictionary dict, final VCFHeader vcfHeader) { try (final VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(dict) .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS) .build()) { writer.writeHeader(vcfHeader); for (final VariantContext variant : variants) { if (variant.getAlternateAlleles().size() > 1) { variant.getCommonInfo().addFilter(InfiniumVcfFields.TRIALLELIC); } writer.add(variant); } } }
Example #3
Source File: GenotypeConcordance.java From picard with MIT License | 6 votes |
/** Gets the variant context writer if the output VCF is to be written, otherwise empty. */ private Optional<VariantContextWriter> getVariantContextWriter(final VCFFileReader truthReader, final VCFFileReader callReader) { if (OUTPUT_VCF) { final File outputVcfFile = new File(OUTPUT + OUTPUT_VCF_FILE_EXTENSION); final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(outputVcfFile) .setReferenceDictionary(callReader.getFileHeader().getSequenceDictionary()) .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) .setOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = builder.build(); // create the output header final List<String> sampleNames = Arrays.asList(OUTPUT_VCF_CALL_SAMPLE_NAME, OUTPUT_VCF_TRUTH_SAMPLE_NAME); final Set<VCFHeaderLine> headerLines = new HashSet<>(); headerLines.addAll(callReader.getFileHeader().getMetaDataInInputOrder()); headerLines.addAll(truthReader.getFileHeader().getMetaDataInInputOrder()); headerLines.add(CONTINGENCY_STATE_HEADER_LINE); writer.writeHeader(new VCFHeader(headerLines, sampleNames)); return Optional.of(writer); } else { return Optional.empty(); } }
Example #4
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "createVCFWriterLenientData") public void testCreateVCFWriterLenientTrue( final File inputFile, final String outputExtension, final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException { final TestGATKToolWithVariants tool = new TestGATKToolWithVariants(); // verify lenient==true is honored by writing a bad attribute final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, true); try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) { writeHeaderAndBadVariant(writer); // write bad attribute succeed with lenient set } final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension); final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5"); Assert.assertTrue(outputFile.exists(), "No output file was not created"); Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored"); Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored"); }
Example #5
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "createVCFWriterData") public void testCreateVCFWriterWithOptions( final File inputFile, final String outputExtension, final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException { // create a writer and make sure the requested index/md5 params are honored final TestGATKToolWithVariants tool = new TestGATKToolWithVariants(); final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false); final VariantContextWriter writer = tool.createVCFWriter(outputFile); writer.close(); final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension); final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5"); Assert.assertTrue(outputFile.exists(), "No output file was not created"); Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored"); Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored"); }
Example #6
Source File: Mutect2Engine.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
public void writeHeader(final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary, final Set<VCFHeaderLine> defaultToolHeaderLines) { final Set<VCFHeaderLine> headerInfo = new HashSet<>(); // all annotation fields from VariantAnnotatorEngine headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); headerInfo.addAll(defaultToolHeaderLines); // all callers need to add these standard FORMAT field header lines VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY); headerInfo.addAll(getM2HeaderLines()); headerInfo.addAll(getSampleHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerInfo, samplesList.asListOfSamples()); vcfHeader.setSequenceDictionary(sequenceDictionary); vcfWriter.writeHeader(vcfHeader); }
Example #7
Source File: PostprocessGermlineCNVCalls.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void generateIntervalsVCFFileFromAllShards() { logger.info("Generating intervals VCF file..."); final VariantContextWriter intervalsVCFWriter = createVCFWriter(outputIntervalsVCFFile); final GermlineCNVIntervalVariantComposer germlineCNVIntervalVariantComposer = new GermlineCNVIntervalVariantComposer(intervalsVCFWriter, sampleName, refAutosomalIntegerCopyNumberState, allosomalContigSet); germlineCNVIntervalVariantComposer.composeVariantContextHeader(sequenceDictionary, getDefaultToolVCFHeaderLines()); logger.info(String.format("Writing intervals VCF file to %s...", outputIntervalsVCFFile.getAbsolutePath())); for (int shardIndex = 0; shardIndex < numShards; shardIndex++) { logger.info(String.format("Analyzing shard %d / %d...", shardIndex + 1, numShards)); germlineCNVIntervalVariantComposer.writeAll(getShardIntervalCopyNumberPosteriorData(shardIndex)); } intervalsVCFWriter.close(); }
Example #8
Source File: HaplotypeCallerEngine.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Create a VCF or GVCF writer as appropriate, given our arguments * * @param outputVCF location to which the vcf should be written * @param readsDictionary sequence dictionary for the reads * @return a VCF or GVCF writer as appropriate, ready to use */ public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary ) { Utils.nonNull(outputVCF); Utils.nonNull(readsDictionary); VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(new File(outputVCF), readsDictionary, false); if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { try { writer = new GVCFWriter(writer, hcArgs.GVCFGQBands, hcArgs.genotypeArgs.samplePloidy); } catch ( IllegalArgumentException e ) { throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage()); } } return writer; }
Example #9
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class) public void testCreateVCFWriterLenientFalse( final File inputFile, final String outputExtension, final String indexExtension, // unused final boolean createIndex, final boolean createMD5) throws IOException { // verify lenient==false is honored by writing a bad attribute final TestGATKToolWithVariants tool = new TestGATKToolWithVariants(); final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false); try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) { writeHeaderAndBadVariant(writer); // throws due to bad attribute } }
Example #10
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "createVCFWriterData") public void testCreateVCFWriterDefaults( final File inputFile, // unused final String outputExtension, final String indexExtension, final boolean createIndex, // unused final boolean createMD5 // unused ) throws IOException { // create a writer and make sure the default index/md5 params are honored final TestGATKToolWithVariants tool = createTestVariantTool(null); final File tmpDir = createTempDir("createVCFTest"); final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension); final VariantContextWriter writer = tool.createVCFWriter(outputFile); writer.close(); final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension); final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5"); Assert.assertTrue(outputFile.exists(), "No output file was not created"); Assert.assertTrue(outFileIndex.exists(), "The index file was not created"); Assert.assertFalse(outFileMD5.exists(), "An md5 file was created and should not have been"); }
Example #11
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "createVCFWriterData") public void testCreateVCFWriterWithNoSequenceDictionary( final File inputFile, final String outputExtension, final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException { // verify that a null sequence dictionary still results in a file, but with no index final TestGATKVariantToolWithNoSequenceDictionary tool = new TestGATKVariantToolWithNoSequenceDictionary(); final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false); final VariantContextWriter writer = tool.createVCFWriter(outputFile); writer.close(); final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension); final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5"); Assert.assertTrue(outputFile.exists(), "No output file was not created"); Assert.assertEquals(outFileIndex.exists(), false, "An index file should not have been created"); // always false with no seq dictionary Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored"); }
Example #12
Source File: SVVCFWriter.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static void writeVariants(final String fileName, final List<VariantContext> variantsArrayList, final SAMSequenceDictionary referenceSequenceDictionary, final Set<VCFHeaderLine> defaultToolVCFHeaderLines) { try (final OutputStream outputStream = new BufferedOutputStream(BucketUtils.createFile(fileName))) { final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary); final VCFHeader vcfHeader = getVcfHeader(referenceSequenceDictionary); defaultToolVCFHeaderLines.forEach(vcfHeader::addMetaDataLine); vcfWriter.writeHeader(vcfHeader); variantsArrayList.forEach(vcfWriter::add); vcfWriter.close(); } catch (final IOException e) { throw new GATKException("Could not create output file", e); } }
Example #13
Source File: VariantDataManager.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
public void writeOutRecalibrationTable(final VariantContextWriter recalWriter, final SAMSequenceDictionary seqDictionary) { // we need to sort in coordinate order in order to produce a valid VCF Collections.sort( data, VariantDatum.getComparator(seqDictionary) ); // create dummy alleles to be used List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false)); for( final VariantDatum datum : data ) { if (VRAC.useASannotations) alleles = Arrays.asList(datum.referenceAllele, datum.alternateAllele); //use the alleles to distinguish between multiallelics in AS mode VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getEnd(), alleles); builder.attribute(VCFConstants.END_KEY, datum.loc.getEnd()); builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod)); builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")); if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true); if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true); recalWriter.add(builder.make()); } }
Example #14
Source File: MNVValidatorApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String tumorBam) { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR"); final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary()) .build(); vcfWriter.writeHeader(outputHeader); final MNVValidator validator = ImmutableMNVValidator.of(tumorBam); final MNVMerger merger = ImmutableMNVMerger.of(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); for (final VariantContext rawVariant : vcfReader) { final VariantContext simplifiedVariant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant; final PotentialMNVRegion potentialMNV = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant); outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add)); } validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add); vcfWriter.close(); vcfReader.close(); LOGGER.info("Written output variants to " + outputVcf); }
Example #15
Source File: PurpleStructuralVariantSupplier.java From hmftools with GNU General Public License v3.0 | 6 votes |
void write(@NotNull final PurityAdjuster purityAdjuster, @NotNull final List<PurpleCopyNumber> copyNumbers) throws IOException { if (header.isPresent()) { try (final IndexedFastaSequenceFile indexedFastaSequenceFile = new IndexedFastaSequenceFile(new File(refGenomePath)); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVCF) .setReferenceDictionary(header.get().getSequenceDictionary()) .setIndexCreator(new TabixIndexCreator(header.get().getSequenceDictionary(), new TabixFormat())) .setOutputFileType(VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF) .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) .build()) { final StructuralRefContextEnrichment refEnricher = new StructuralRefContextEnrichment(indexedFastaSequenceFile, writer::add); writer.writeHeader(refEnricher.enrichHeader(header.get())); enriched(purityAdjuster, copyNumbers).forEach(refEnricher); refEnricher.flush(); } } }
Example #16
Source File: GATKVariantContextUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class) public void testCreateVCFWriterLenientFalse( final String outputExtension, final String indexExtension, // unused final boolean createIndex, final boolean createMD5) throws IOException { final File tmpDir = createTempDir("createVCFTest"); final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension); Options options[] = createIndex ? new Options[] {Options.INDEX_ON_THE_FLY} : new Options[] {}; try (final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter( outputFile.toPath(), makeSimpleSequenceDictionary(), createMD5, options)) { writeHeader(vcw); writeBadVariant(vcw); // write a bad attribute and throw... } }
Example #17
Source File: GermlineCNVIntervalVariantComposerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(dataProvider = "alleleDeterminationTestData") public void testAlleleDetermination(final int refAutosomalCopyNumber, final Set<String> allosomalContigs, final int baselineCopyNumber, final Allele expectedAllele) { final File outputFile = createTempFile("test", ".vcf"); final VariantContextWriter outputWriter = GATKVariantContextUtils.createVCFWriter(outputFile.toPath(), null, false); final GermlineCNVIntervalVariantComposer variantComposer = new GermlineCNVIntervalVariantComposer( outputWriter, "TEST_SAMPLE_NAME", new IntegerCopyNumberState(refAutosomalCopyNumber), allosomalContigs); final VariantContext var = variantComposer.composeVariantContext( new IntervalCopyNumberGenotypingData(TEST_INTERVAL, TEST_DISTRIBUTION, new IntegerCopyNumberState(baselineCopyNumber))); final List<Allele> allAlleles = var.getAlleles(); Assert.assertEquals(allAlleles, GermlineCNVIntervalVariantComposer.ALL_ALLELES); final Genotype gen = var.getGenotype(TEST_SAMPLE_NAME); final Allele genAllele = gen.getAlleles().get(0); Assert.assertEquals(genAllele, expectedAllele); }
Example #18
Source File: AmberVCF.java From hmftools with GNU General Public License v3.0 | 6 votes |
public void writeBAF(@NotNull final String filename, @NotNull final Collection<TumorBAF> tumorEvidence, @NotNull final AmberHetNormalEvidence hetNormalEvidence) { final List<TumorBAF> list = Lists.newArrayList(tumorEvidence); Collections.sort(list); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build(); final VCFHeader header = header(config.tumorOnly() ? Collections.singletonList(config.tumor()) : config.allSamples()); writer.setHeader(header); writer.writeHeader(header); final ListMultimap<AmberSite, Genotype> genotypeMap = ArrayListMultimap.create(); for (final String sample : hetNormalEvidence.samples()) { for (BaseDepth baseDepth : hetNormalEvidence.evidence(sample)) { genotypeMap.put(AmberSiteFactory.asSite(baseDepth), createGenotype(sample, baseDepth)); } } for (final TumorBAF tumorBAF : list) { AmberSite tumorSite = AmberSiteFactory.tumorSite(tumorBAF); genotypeMap.put(tumorSite, createGenotype(tumorBAF)); writer.add(create(tumorBAF, genotypeMap.get(tumorSite))); } writer.close(); }
Example #19
Source File: FingerprintUtils.java From picard with MIT License | 6 votes |
private static VariantContextWriter getVariantContextWriter(final File outputFile, final File referenceSequenceFileName, final String sample, final String source, final ReferenceSequenceFile ref) { final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder() .setReferenceDictionary(ref.getSequenceDictionary()) .setOutputFile(outputFile).build(); final Set<VCFHeaderLine> lines = new LinkedHashSet<>(); lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath())); lines.add(new VCFHeaderLine("source", source)); lines.add(new VCFHeaderLine("fileDate", new Date().toString())); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY)); final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample)); header.setSequenceDictionary(ref.getSequenceDictionary()); variantContextWriter.writeHeader(header); return variantContextWriter; }
Example #20
Source File: HaplotypeCallerEngine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Create a VCF or GVCF writer as appropriate, given our arguments * * @param outputVCF location to which the vcf should be written * @param readsDictionary sequence dictionary for the reads * @return a VCF or GVCF writer as appropriate, ready to use */ public VariantContextWriter makeVCFWriter( final String outputVCF, final SAMSequenceDictionary readsDictionary, final boolean createOutputVariantIndex, final boolean createOutputVariantMD5, final boolean sitesOnlyMode ) { Utils.nonNull(outputVCF); Utils.nonNull(readsDictionary); final List<Options> options = new ArrayList<>(2); if (createOutputVariantIndex) {options.add(Options.INDEX_ON_THE_FLY);} if (sitesOnlyMode) {options.add(Options.DO_NOT_WRITE_GENOTYPES);} VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter( IOUtils.getPath(outputVCF), readsDictionary, createOutputVariantMD5, options.toArray(new Options[options.size()]) ); if ( hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { try { writer = new GVCFWriter(writer, new ArrayList<Number>(hcArgs.GVCFGQBands), hcArgs.standardArgs.genotypeArgs.samplePloidy, hcArgs.floorBlocks); } catch ( IllegalArgumentException e ) { throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage()); } } return writer; }
Example #21
Source File: CombineGVCFs.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private VariantContextWriter getVCFWriter() { final SortedSet<String> samples = getSamplesForVariants(); final VCFHeader inputVCFHeader = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples); final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder()); headerLines.addAll(getDefaultToolVCFHeaderLines()); headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions()); // add headers for annotations added by this tool headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags if ( dbsnp.dbsnp != null ) { VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY); } if (somaticInput) { //single-sample M2 variant filter status will get moved to genotype filter headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); if (!dropSomaticFilteringAnnotations) { //standard M2 INFO annotations for filtering will get moved to FORMAT field for (final String key : Mutect2FilteringEngine.STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING) { headerLines.add(GATKVCFHeaderLines.getEquivalentFormatHeaderLine(key)); } } } VariantContextWriter writer = createVCFWriter(outputFile); final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples(); final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet)); writer.writeHeader(vcfHeader); return writer; }
Example #22
Source File: GATKVariantContextUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name. * The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if * <code>lenientProcessing</code> is set), followed by the set of options specified by any <code>options</code> args. * * @param outPath output Path for this writer. May not be null. * @param referenceDictionary required if on the fly indexing is set, otherwise can be null * @param createMD5 true if an md5 file should be created * @param options variable length list of additional Options to be set for this writer * @returns VariantContextWriter must be closed by the caller */ public static VariantContextWriter createVCFWriter( final Path outPath, final SAMSequenceDictionary referenceDictionary, final boolean createMD5, final Options... options) { Utils.nonNull(outPath); VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder().clearOptions().setOutputPath(outPath); if (VariantContextWriterBuilder.OutputType.UNSPECIFIED == VariantContextWriterBuilder.determineOutputTypeFromFile(outPath)) { // the only way the user has to specify an output type is by file extension, and htsjdk // throws if it can't map the file extension to a known vcf type, so fallback to a default // of VCF logger.warn(String.format( "Can't determine output variant file format from output file extension \"%s\". Defaulting to VCF.", FilenameUtils.getExtension(outPath.getFileName().toString()))); vcWriterBuilder = vcWriterBuilder.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF); } if (createMD5) { vcWriterBuilder.setCreateMD5(); } if (null != referenceDictionary) { vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceDictionary); } for (Options opt : options) { vcWriterBuilder = vcWriterBuilder.setOption(opt); } return vcWriterBuilder.build(); }
Example #23
Source File: Mutect2Engine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void writeHeader(final VariantContextWriter vcfWriter, final Set<VCFHeaderLine> defaultToolHeaderLines) { final Set<VCFHeaderLine> headerInfo = new HashSet<>(); headerInfo.add(new VCFHeaderLine("MutectVersion", MUTECT_VERSION)); headerInfo.add(new VCFHeaderLine(FilterMutectCalls.FILTERING_STATUS_VCF_KEY, "Warning: unfiltered Mutect 2 calls. Please run " + FilterMutectCalls.class.getSimpleName() + " to remove false positives.")); headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions(false)); headerInfo.addAll(defaultToolHeaderLines); STANDARD_MUTECT_INFO_FIELDS.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerInfo::add); VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ALLELE_FRACTION_KEY)); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY)); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY)); headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.PHASE_SET_KEY)); ReadUtils.getSamplesFromHeader(header).stream().filter(this::isTumorSample) .forEach(s -> headerInfo.add(new VCFHeaderLine(TUMOR_SAMPLE_KEY_IN_VCF_HEADER, s))); normalSamples.forEach(sample -> headerInfo.add(new VCFHeaderLine(NORMAL_SAMPLE_KEY_IN_VCF_HEADER, sample))); if (emitReferenceConfidence()) { headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines()); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY)); } final VCFHeader vcfHeader = new VCFHeader(headerInfo, samplesList.asListOfSamples()); vcfHeader.setSequenceDictionary(header.getSequenceDictionary()); vcfWriter.writeHeader(vcfHeader); }
Example #24
Source File: SVVCFWriter.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static VariantContextWriter getVariantContextWriter(final OutputStream outputStream, final SAMSequenceDictionary referenceSequenceDictionary) { VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder() .clearOptions() .setOutputStream(outputStream); if (null != referenceSequenceDictionary) { vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceSequenceDictionary); } for (final Options opt : new Options[]{}) { vcWriterBuilder = vcWriterBuilder.setOption(opt); } return vcWriterBuilder.build(); }
Example #25
Source File: FingerprintUtils.java From picard with MIT License | 5 votes |
/** * A function that takes a Fingerprint and writes it as a VCF to a file * * @param fingerprint the fingerprint to write * @param outputFile the file to write to * @param referenceSequenceFileName the reference sequence (file) * @param sample the sample name to use in the vcf * @param source a "source" comment to use in the VCF * @throws IOException */ public static void writeFingerPrint(final Fingerprint fingerprint, final File outputFile, final File referenceSequenceFileName, final String sample, final String source) throws IOException { try (final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequenceFileName); final VariantContextWriter variantContextWriter = getVariantContextWriter(outputFile, referenceSequenceFileName, sample, source, ref)) { createVCSetFromFingerprint(fingerprint, ref, sample).forEach(variantContextWriter::add); } }
Example #26
Source File: GenotypeGVCFsEngine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Create a VCF header in the writer * * @param vcfWriter * @return a VCF writer */ public VariantContextWriter setupVCFWriter(Set<VCFHeaderLine> defaultToolVCFHeaderLines, boolean keepCombined, DbsnpArgumentCollection dbsnp, VariantContextWriter vcfWriter) { final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder()); headerLines.addAll(defaultToolVCFHeaderLines); // Remove GCVFBlocks headerLines.removeIf(vcfHeaderLine -> vcfHeaderLine.getKey().startsWith(GVCF_BLOCK)); headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions(false)); headerLines.addAll(genotypingEngine.getAppropriateVCFInfoHeaders()); // add headers for annotations added by this tool headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY)); headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY)); headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags if (keepCombined) { headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_QUAL_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)); } if ( dbsnp.dbsnp != null ) { VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY); } final Set<String> sampleNameSet = samples.asSetOfSamples(); outputHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet)); vcfWriter.writeHeader(outputHeader); return vcfWriter; }
Example #27
Source File: CNNScoreVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void writeVCFHeader(VariantContextWriter vcfWriter) { // setup the header fields final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder(); final Set<VCFHeaderLine> hInfo = new HashSet<>(inputHeaders); hInfo.add(GATKVCFHeaderLines.getInfoLine(scoreKey)); final TreeSet<String> samples = new TreeSet<>(); samples.addAll(inputHeader.getGenotypeSamples()); hInfo.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }
Example #28
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void writeHeaderAndBadVariant(final VariantContextWriter writer) { final VariantContextBuilder vcBuilder = new VariantContextBuilder( "chr1","1", 1, 1, Arrays.asList(Allele.create("A", true))); vcBuilder.attribute("fake", new Object()); final VariantContext vc = vcBuilder.make(); final VCFHeader vcfHeader = new VCFHeader(); writer.writeHeader(vcfHeader); writer.add(vc); }
Example #29
Source File: VcfTestUtils.java From picard with MIT License | 5 votes |
/** * This method makes a copy of the input VCF and creates an index file for it in the same location. * This is done so that we don't need to store the index file in the same repo * The copy of the input is done so that it and its index are in the same directory which is typically required. * * @param vcfFile the vcf file to index * @return File a vcf file (index file is created in same path). */ public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix, final String suffix) throws IOException { final String extension; if (suffix != null) { extension = suffix; } else if (vcfFile.getAbsolutePath().endsWith(".vcf")) { extension = ".vcf"; } else if (vcfFile.getAbsolutePath().endsWith(".vcf.gz")) { extension = ".vcf.gz"; } else { extension = ""; } if (!extension.equals(".vcf") && !extension.equals(".vcf.gz")) { throw new IllegalArgumentException("couldn't find a .vcf or .vcf.gz ending for input file " + vcfFile.getAbsolutePath()); } File output = createTemporaryIndexedFile(tempFilePrefix, extension); try (final VCFFileReader in = new VCFFileReader(vcfFile, false)) { final VCFHeader header = in.getFileHeader(); try (final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(header.getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(header); for (final VariantContext ctx : in) { out.add(ctx); } } } return output; }
Example #30
Source File: SortVcf.java From picard with MIT License | 5 votes |
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) { final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records"); final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(outputHeader.getSequenceDictionary()). setOptions(options). setOutputFile(OUTPUT).build(); out.writeHeader(outputHeader); for (final VariantContext variantContext : sortedOutput) { out.add(variantContext); writeProgress.record(variantContext.getContig(), variantContext.getStart()); } out.close(); }