htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder Java Examples
The following examples show how to use
htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PurpleStructuralVariantSupplier.java From hmftools with GNU General Public License v3.0 | 6 votes |
void write(@NotNull final PurityAdjuster purityAdjuster, @NotNull final List<PurpleCopyNumber> copyNumbers) throws IOException { if (header.isPresent()) { try (final IndexedFastaSequenceFile indexedFastaSequenceFile = new IndexedFastaSequenceFile(new File(refGenomePath)); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVCF) .setReferenceDictionary(header.get().getSequenceDictionary()) .setIndexCreator(new TabixIndexCreator(header.get().getSequenceDictionary(), new TabixFormat())) .setOutputFileType(VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF) .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) .build()) { final StructuralRefContextEnrichment refEnricher = new StructuralRefContextEnrichment(indexedFastaSequenceFile, writer::add); writer.writeHeader(refEnricher.enrichHeader(header.get())); enriched(purityAdjuster, copyNumbers).forEach(refEnricher); refEnricher.flush(); } } }
Example #2
Source File: GtcToVcf.java From picard with MIT License | 6 votes |
/** * Writes out the VariantContext objects in the order presented to the supplied output file * in VCF format. */ private void writeVcf(final SortingCollection<VariantContext> variants, final File output, final SAMSequenceDictionary dict, final VCFHeader vcfHeader) { try (final VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(dict) .setOptions(VariantContextWriterBuilder.DEFAULT_OPTIONS) .build()) { writer.writeHeader(vcfHeader); for (final VariantContext variant : variants) { if (variant.getAlternateAlleles().size() > 1) { variant.getCommonInfo().addFilter(InfiniumVcfFields.TRIALLELIC); } writer.add(variant); } } }
Example #3
Source File: VariantToVcf.java From genomewarp with Apache License 2.0 | 6 votes |
/** * Converts a list of variants into a VCF file, given a {@link VCFHeader} * and an {@link OutputStream}. * <p> This function uses HTSJDK to create {@link VariantCall} objects then * writes them to the given output stream. Note that this implementation depends * heavily on HTSJDK and makes the same assumptions as HTSJDK (e.g. integer values GQ). * * @param header The header to use to generate the output VCF * @param variants A list of variants to encode in the output VCF * @param os The output stream to which to write the generated VCF */ public static void convertVariantToVcf(VCFHeader header, List<Variant> variants, OutputStream os, boolean writeHeader) { if (vcfWriter == null) { vcfWriter = new VariantContextWriterBuilder().clearOptions() .setOutputVCFStream(os).build(); } if (writeHeader) { vcfWriter.writeHeader(header); } for (Variant currVariant : variants) { vcfWriter.add(getContextFromVariant(header, currVariant)); } }
Example #4
Source File: MNVValidatorApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String tumorBam) { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR"); final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary()) .build(); vcfWriter.writeHeader(outputHeader); final MNVValidator validator = ImmutableMNVValidator.of(tumorBam); final MNVMerger merger = ImmutableMNVMerger.of(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); for (final VariantContext rawVariant : vcfReader) { final VariantContext simplifiedVariant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant; final PotentialMNVRegion potentialMNV = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant); outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add)); } validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add); vcfWriter.close(); vcfReader.close(); LOGGER.info("Written output variants to " + outputVcf); }
Example #5
Source File: GenotypeConcordance.java From picard with MIT License | 6 votes |
/** Gets the variant context writer if the output VCF is to be written, otherwise empty. */ private Optional<VariantContextWriter> getVariantContextWriter(final VCFFileReader truthReader, final VCFFileReader callReader) { if (OUTPUT_VCF) { final File outputVcfFile = new File(OUTPUT + OUTPUT_VCF_FILE_EXTENSION); final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(outputVcfFile) .setReferenceDictionary(callReader.getFileHeader().getSequenceDictionary()) .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) .setOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = builder.build(); // create the output header final List<String> sampleNames = Arrays.asList(OUTPUT_VCF_CALL_SAMPLE_NAME, OUTPUT_VCF_TRUTH_SAMPLE_NAME); final Set<VCFHeaderLine> headerLines = new HashSet<>(); headerLines.addAll(callReader.getFileHeader().getMetaDataInInputOrder()); headerLines.addAll(truthReader.getFileHeader().getMetaDataInInputOrder()); headerLines.add(CONTINGENCY_STATE_HEADER_LINE); writer.writeHeader(new VCFHeader(headerLines, sampleNames)); return Optional.of(writer); } else { return Optional.empty(); } }
Example #6
Source File: AmberVCF.java From hmftools with GNU General Public License v3.0 | 6 votes |
public void writeBAF(@NotNull final String filename, @NotNull final Collection<TumorBAF> tumorEvidence, @NotNull final AmberHetNormalEvidence hetNormalEvidence) { final List<TumorBAF> list = Lists.newArrayList(tumorEvidence); Collections.sort(list); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build(); final VCFHeader header = header(config.tumorOnly() ? Collections.singletonList(config.tumor()) : config.allSamples()); writer.setHeader(header); writer.writeHeader(header); final ListMultimap<AmberSite, Genotype> genotypeMap = ArrayListMultimap.create(); for (final String sample : hetNormalEvidence.samples()) { for (BaseDepth baseDepth : hetNormalEvidence.evidence(sample)) { genotypeMap.put(AmberSiteFactory.asSite(baseDepth), createGenotype(sample, baseDepth)); } } for (final TumorBAF tumorBAF : list) { AmberSite tumorSite = AmberSiteFactory.tumorSite(tumorBAF); genotypeMap.put(tumorSite, createGenotype(tumorBAF)); writer.add(create(tumorBAF, genotypeMap.get(tumorSite))); } writer.close(); }
Example #7
Source File: FingerprintUtils.java From picard with MIT License | 6 votes |
private static VariantContextWriter getVariantContextWriter(final File outputFile, final File referenceSequenceFileName, final String sample, final String source, final ReferenceSequenceFile ref) { final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder() .setReferenceDictionary(ref.getSequenceDictionary()) .setOutputFile(outputFile).build(); final Set<VCFHeaderLine> lines = new LinkedHashSet<>(); lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath())); lines.add(new VCFHeaderLine("source", source)); lines.add(new VCFHeaderLine("fileDate", new Date().toString())); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY)); final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample)); header.setSequenceDictionary(ref.getSequenceDictionary()); variantContextWriter.writeHeader(header); return variantContextWriter; }
Example #8
Source File: AnnotateStrelkaWithAllelicDepth.java From hmftools with GNU General Public License v3.0 | 6 votes |
private AnnotateStrelkaWithAllelicDepth(final Options options, final String... args) throws ParseException { final CommandLine cmd = createCommandLine(args, options); if (!cmd.hasOption(VCF_IN)) { throw new ParseException(VCF_IN + " is a mandatory argument"); } if (!cmd.hasOption(VCF_OUT)) { throw new ParseException(VCF_OUT + " is a mandatory argument"); } inputVCF = cmd.getOptionValue(VCF_IN); outputVCF = cmd.getOptionValue(VCF_OUT); vcfReader = new VCFFileReader(new File(inputVCF), false); header = generateOutputHeader(vcfReader.getFileHeader()); vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVCF) .setReferenceDictionary(header.getSequenceDictionary()) .setIndexCreator(new TabixIndexCreator(header.getSequenceDictionary(), new TabixFormat())) .setOption(htsjdk.variant.variantcontext.writer.Options.ALLOW_MISSING_FIELDS_IN_HEADER) .build(); }
Example #9
Source File: HaplotypeMap.java From picard with MIT License | 5 votes |
public void writeAsVcf(final File output, final File refFile) throws FileNotFoundException { ReferenceSequenceFile ref = new IndexedFastaSequenceFile(refFile); try (VariantContextWriter writer = new VariantContextWriterBuilder() .setOutputFile(output) .setReferenceDictionary(ref.getSequenceDictionary()) .build()) { final VCFHeader vcfHeader = new VCFHeader( VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false), Collections.singleton(HET_GENOTYPE_FOR_PHASING)); VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false); vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed")); vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.String, "Phase-set identifier for phased genotypes.")); vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.SOURCE_KEY,"HaplotypeMap::writeAsVcf")); vcfHeader.addMetaDataLine(new VCFHeaderLine("reference","HaplotypeMap::writeAsVcf")); // vcfHeader.addMetaDataLine(new VCFHeaderLine()); writer.writeHeader(vcfHeader); final LinkedList<VariantContext> variants = new LinkedList<>(this.asVcf(ref)); variants.sort(vcfHeader.getVCFRecordComparator()); variants.forEach(writer::add); } }
Example #10
Source File: UpdateVcfSequenceDictionary.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY); IOUtil.assertFileIsWritable(OUTPUT); final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath()); final VCFFileReader fileReader = new VCFFileReader(INPUT, false); final VCFHeader fileHeader = fileReader.getFileHeader(); final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setReferenceDictionary(samSequenceDictionary) .clearOptions(); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter vcfWriter = builder.setOutputFile(OUTPUT).build(); fileHeader.setSequenceDictionary(samSequenceDictionary); vcfWriter.writeHeader(fileHeader); final ProgressLogger progress = new ProgressLogger(log, 10000); final CloseableIterator<VariantContext> iterator = fileReader.iterator(); while (iterator.hasNext()) { final VariantContext context = iterator.next(); vcfWriter.add(context); progress.record(context.getContig(), context.getStart()); } CloserUtil.close(iterator); CloserUtil.close(fileReader); vcfWriter.close(); return 0; }
Example #11
Source File: RenameSampleInVcf.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final VCFFileReader in = new VCFFileReader(INPUT, false); final VCFHeader header = in.getFileHeader(); if (header.getGenotypeSamples().size() > 1) { throw new IllegalArgumentException("Input VCF must be single-sample."); } if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) { throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0)); } final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS); if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY); final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME)); final VariantContextWriter out = new VariantContextWriterBuilder() .setOptions(options) .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build(); out.writeHeader(outHeader); for (final VariantContext ctx : in) { out.add(ctx); } out.close(); in.close(); return 0; }
Example #12
Source File: SortVcf.java From picard with MIT License | 5 votes |
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) { final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records"); final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class); final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(outputHeader.getSequenceDictionary()). setOptions(options). setOutputFile(OUTPUT).build(); out.writeHeader(outputHeader); for (final VariantContext variantContext : sortedOutput) { out.add(variantContext); writeProgress.record(variantContext.getContig(), variantContext.getStart()); } out.close(); }
Example #13
Source File: FindMendelianViolations.java From picard with MIT License | 5 votes |
private void writeAllViolations(final MendelianViolationDetector.Result result) { if (VCF_DIR != null) { LOG.info(String.format("Writing family violation VCFs to %s/", VCF_DIR.getAbsolutePath())); final VariantContextComparator vcComparator = new VariantContextComparator(inputHeader.get().getContigLines()); final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputHeader.get().getMetaDataInInputOrder()); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.MENDELIAN_VIOLATION_KEY, 1, VCFHeaderLineType.String, "Type of mendelian violation.")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AC, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AF, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF")); headerLines.add(new VCFInfoHeaderLine(MendelianViolationDetector.ORIGINAL_AN, 1, VCFHeaderLineType.Integer, "Original AN")); for (final PedFile.PedTrio trio : pedFile.get().values()) { final File outputFile = new File(VCF_DIR, IOUtil.makeFileNameSafe(trio.getFamilyId() + IOUtil.VCF_FILE_EXTENSION)); LOG.info(String.format("Writing %s violation VCF to %s", trio.getFamilyId(), outputFile.getAbsolutePath())); final VariantContextWriter out = new VariantContextWriterBuilder() .setOutputFile(outputFile) .unsetOption(INDEX_ON_THE_FLY) .build(); final VCFHeader newHeader = new VCFHeader(headerLines, CollectionUtil.makeList(trio.getMaternalId(), trio.getPaternalId(), trio.getIndividualId())); final TreeSet<VariantContext> orderedViolations = new TreeSet<>(vcComparator); orderedViolations.addAll(result.violations().get(trio.getFamilyId())); out.writeHeader(newHeader); orderedViolations.forEach(out::add); out.close(); } } }
Example #14
Source File: VcfFormatConverter.java From picard with MIT License | 5 votes |
@Override protected int doWork() { final ProgressLogger progress = new ProgressLogger(LOG, 10000); IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX); final VCFHeader header = new VCFHeader(reader.getFileHeader()); final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary(); if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output."); } final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(OUTPUT) .setReferenceDictionary(sequenceDictionary); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); else builder.unsetOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = builder.build(); writer.writeHeader(header); final CloseableIterator<VariantContext> iterator = reader.iterator(); while (iterator.hasNext()) { final VariantContext context = iterator.next(); writer.add(context); progress.record(context.getContig(), context.getStart()); } CloserUtil.close(iterator); CloserUtil.close(reader); writer.close(); return 0; }
Example #15
Source File: VcfTestUtils.java From picard with MIT License | 5 votes |
/** * This method makes a copy of the input VCF and creates an index file for it in the same location. * This is done so that we don't need to store the index file in the same repo * The copy of the input is done so that it and its index are in the same directory which is typically required. * * @param vcfFile the vcf file to index * @return File a vcf file (index file is created in same path). */ public static File createTemporaryIndexedVcfFromInput(final File vcfFile, final String tempFilePrefix, final String suffix) throws IOException { final String extension; if (suffix != null) { extension = suffix; } else if (vcfFile.getAbsolutePath().endsWith(".vcf")) { extension = ".vcf"; } else if (vcfFile.getAbsolutePath().endsWith(".vcf.gz")) { extension = ".vcf.gz"; } else { extension = ""; } if (!extension.equals(".vcf") && !extension.equals(".vcf.gz")) { throw new IllegalArgumentException("couldn't find a .vcf or .vcf.gz ending for input file " + vcfFile.getAbsolutePath()); } File output = createTemporaryIndexedFile(tempFilePrefix, extension); try (final VCFFileReader in = new VCFFileReader(vcfFile, false)) { final VCFHeader header = in.getFileHeader(); try (final VariantContextWriter out = new VariantContextWriterBuilder(). setReferenceDictionary(header.getSequenceDictionary()). setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)). setOutputFile(output).build()) { out.writeHeader(header); for (final VariantContext ctx : in) { out.add(ctx); } } } return output; }
Example #16
Source File: GATKVariantContextUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name. * The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if * <code>lenientProcessing</code> is set), followed by the set of options specified by any <code>options</code> args. * * @param outPath output Path for this writer. May not be null. * @param referenceDictionary required if on the fly indexing is set, otherwise can be null * @param createMD5 true if an md5 file should be created * @param options variable length list of additional Options to be set for this writer * @returns VariantContextWriter must be closed by the caller */ public static VariantContextWriter createVCFWriter( final Path outPath, final SAMSequenceDictionary referenceDictionary, final boolean createMD5, final Options... options) { Utils.nonNull(outPath); VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder().clearOptions().setOutputPath(outPath); if (VariantContextWriterBuilder.OutputType.UNSPECIFIED == VariantContextWriterBuilder.determineOutputTypeFromFile(outPath)) { // the only way the user has to specify an output type is by file extension, and htsjdk // throws if it can't map the file extension to a known vcf type, so fallback to a default // of VCF logger.warn(String.format( "Can't determine output variant file format from output file extension \"%s\". Defaulting to VCF.", FilenameUtils.getExtension(outPath.getFileName().toString()))); vcWriterBuilder = vcWriterBuilder.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF); } if (createMD5) { vcWriterBuilder.setCreateMD5(); } if (null != referenceDictionary) { vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceDictionary); } for (Options opt : options) { vcWriterBuilder = vcWriterBuilder.setOption(opt); } return vcWriterBuilder.build(); }
Example #17
Source File: SVVCFWriter.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static VariantContextWriter getVariantContextWriter(final OutputStream outputStream, final SAMSequenceDictionary referenceSequenceDictionary) { VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder() .clearOptions() .setOutputStream(outputStream); if (null != referenceSequenceDictionary) { vcWriterBuilder = vcWriterBuilder.setReferenceDictionary(referenceSequenceDictionary); } for (final Options opt : new Options[]{}) { vcWriterBuilder = vcWriterBuilder.setOption(opt); } return vcWriterBuilder.build(); }
Example #18
Source File: EvaluateCopyNumberTriStateCalls.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
private VariantContextWriter openVCFWriter(final File outputFile, final Set<String> samples) { final VariantContextWriterBuilder builder = new VariantContextWriterBuilder(); builder.setOutputFile(outputFile); builder.clearOptions(); final VariantContextWriter result = builder.build(); final VCFHeader header = new VCFHeader(Collections.emptySet(), samples); CopyNumberTriStateAllele.addHeaderLinesTo(header); EvaluationClass.addHeaderLinesTo(header); // Format annotations. header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "Called genotype")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Quality of the call")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of called segments that overlap with the truth")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Called allele count for mixed calls")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Truth copy fraction estimated")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Truth call quality")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.EVALUATION_CLASS_KEY, 1, VCFHeaderLineType.Character, "The evaluation class for the call or lack of call. It the values of the header key '" + EvaluationClass.VCF_HEADER_KEY + "'")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "The truth genotype")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets covered by called segments")); header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "1 - The probability of th event in Phred scale (the maximum if ther are more than one segment")); header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "The quality of the call (the maximum if there are more than one segment")); header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Character, "Genotype filters")); // Info annotations. header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the truth callset")); header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the truth callset")); header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the actual callset")); header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the actual callset")); header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets overlapped by this variant")); header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position for the variant")); // Filter annotations. for (final EvaluationFilter filter : EvaluationFilter.values()) { header.addMetaDataLine(new VCFFilterHeaderLine(filter.name(), filter.description)); header.addMetaDataLine(new VCFFilterHeaderLine(filter.acronym, filter.description)); } header.addMetaDataLine(new VCFFilterHeaderLine(EvaluationFilter.PASS, "Indicates that it passes all filters")); result.writeHeader(header); return result; }
Example #19
Source File: HotspotEvidenceVCF.java From hmftools with GNU General Public License v3.0 | 5 votes |
public void write(@NotNull final String filename, @NotNull final List<HotspotEvidence> evidenceList) { final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, false).build(); writer.setHeader(header); writer.writeHeader(header); final ListMultimap<GenomePosition, HotspotEvidence> evidenceMap = Multimaps.index(evidenceList, GenomePositions::create); for (GenomePosition site : evidenceMap.keySet()) { final List<HotspotEvidence> evidence = evidenceMap.get(site); final VariantContext context = create(evidence); writer.add(context); } writer.close(); }
Example #20
Source File: AmberVCF.java From hmftools with GNU General Public License v3.0 | 5 votes |
void writeContamination(@NotNull final String filename, @NotNull final Collection<TumorContamination> evidence) { final List<TumorContamination> list = Lists.newArrayList(evidence); Collections.sort(list); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build(); final VCFHeader header = header(Lists.newArrayList(config.primaryReference(), config.tumor())); writer.setHeader(header); writer.writeHeader(header); list.forEach(x -> writer.add(create(x))); writer.close(); }
Example #21
Source File: AmberVCF.java From hmftools with GNU General Public License v3.0 | 5 votes |
void writeSNPCheck(@NotNull final String filename, @NotNull final List<BaseDepth> baseDepths) { final List<BaseDepth> list = Lists.newArrayList(baseDepths); Collections.sort(list); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(filename).modifyOption(Options.INDEX_ON_THE_FLY, true).build(); final VCFHeader header = header(Lists.newArrayList(config.primaryReference())); writer.setHeader(header); writer.writeHeader(header); list.forEach(x -> writer.add(create(x))); writer.close(); }
Example #22
Source File: SageVCF.java From hmftools with GNU General Public License v3.0 | 5 votes |
public SageVCF(@NotNull final IndexedFastaSequenceFile reference, @NotNull final SageConfig config) { writer = new VariantContextWriterBuilder().setOutputFile(config.outputFile()) .modifyOption(Options.INDEX_ON_THE_FLY, true) .modifyOption(Options.USE_ASYNC_IO, false) .setReferenceDictionary(reference.getSequenceDictionary()) .build(); refContextEnrichment = new SomaticRefContextEnrichment(reference, this::writeToFile); final VCFHeader header = refContextEnrichment.enrichHeader(header(config)); header.setSequenceDictionary(reference.getSequenceDictionary()); writer.writeHeader(header); }
Example #23
Source File: PonVCF.java From hmftools with GNU General Public License v3.0 | 5 votes |
PonVCF(final String output, int sampleSize) { writer = new VariantContextWriterBuilder().setOutputFile(output) .modifyOption(Options.INDEX_ON_THE_FLY, false) .modifyOption(Options.USE_ASYNC_IO, false) .modifyOption(Options.DO_NOT_WRITE_GENOTYPES, true) .build(); final VCFHeader header = new VCFHeader(); header.addMetaDataLine(new VCFInfoHeaderLine(PON_COUNT, 1, VCFHeaderLineType.Integer, "how many samples had the variant")); header.addMetaDataLine(new VCFInfoHeaderLine(PON_TOTAL, 1, VCFHeaderLineType.Integer, "total depth")); header.addMetaDataLine(new VCFInfoHeaderLine(PON_MAX, 1, VCFHeaderLineType.Integer, "max depth")); header.addMetaDataLine(new VCFHeaderLine("PonInputSampleCount", String.valueOf(sampleSize))); writer.writeHeader(header); }
Example #24
Source File: ViccExtractorTestApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private static void writeHotspots(@NotNull String hotspotVcf, @NotNull Map<ViccEntry, ViccExtractionResult> resultsPerEntry) { VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(hotspotVcf) .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF) .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) .modifyOption(Options.INDEX_ON_THE_FLY, false) .build(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Lists.newArrayList()); writer.writeHeader(header); for (Map.Entry<VariantHotspot, HotspotAnnotation> entry : convertAndSort(resultsPerEntry).entrySet()) { VariantHotspot hotspot = entry.getKey(); HotspotAnnotation annotation = entry.getValue(); List<Allele> hotspotAlleles = buildAlleles(hotspot); VariantContext variantContext = new VariantContextBuilder().noGenotypes() .source("VICC") .chr(hotspot.chromosome()) .start(hotspot.position()) .alleles(hotspotAlleles) .computeEndFromAlleles(hotspotAlleles, (int) hotspot.position()) .attribute("sources", annotation.sources()) .attribute("feature", ProteinKeyFormatter.toProteinKey(annotation.gene(), annotation.transcript(), annotation.proteinAnnotation())) .make(); LOGGER.debug("Writing {}", variantContext); writer.add(variantContext); } writer.close(); }
Example #25
Source File: MNVDetectorApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed, boolean strelka) throws IOException { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader(); final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false)); final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(outputHeader.getSequenceDictionary()) .build(); vcfWriter.writeHeader(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); for (final VariantContext rawVariant : vcfReader) { final VariantContext variant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant; final PotentialMNVRegion potentialMNVregion = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNVregion, variant); outputPair.getRight() .ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion, vcfWriter, bedWriter, "\n"))); } filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, "")); vcfWriter.close(); vcfReader.close(); bedWriter.close(); LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed); }
Example #26
Source File: StrelkaPostProcessApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private static void processVariants(@NotNull final String filePath, @NotNull final Slicer highConfidenceSlicer, @NotNull final String outputVcf, @NotNull final String sampleName, @NotNull final String tumorBam) { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), sampleName); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(outputHeader.getSequenceDictionary()) .build(); writer.writeHeader(outputHeader); final MNVValidator validator = ImmutableMNVValidator.of(tumorBam); final MNVMerger merger = ImmutableMNVMerger.of(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); final VariantContextFilter filter = new StrelkaPostProcess(highConfidenceSlicer); for (final VariantContext variantContext : vcfReader) { if (filter.test(variantContext)) { final VariantContext simplifiedVariant = StrelkaPostProcess.simplifyVariant(variantContext, sampleName); final PotentialMNVRegion potentialMNV = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant); outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(writer::add)); } } validator.mergeVariants(outputPair.getLeft(), merger).forEach(writer::add); writer.close(); vcfReader.close(); LOGGER.info("Written output variants to " + outputVcf); }
Example #27
Source File: MergeVcfs.java From picard with MIT License | 4 votes |
@Override protected int doWork() { final ProgressLogger progress = new ProgressLogger(log, 10000); final List<String> sampleList = new ArrayList<String>(); INPUT = IOUtil.unrollFiles(INPUT, IOUtil.VCF_EXTENSIONS); final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size()); final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size()); VariantContextComparator variantContextComparator = null; SAMSequenceDictionary sequenceDictionary = null; if (SEQUENCE_DICTIONARY != null) { sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary(); } for (final File file : INPUT) { IOUtil.assertFileIsReadable(file); final VCFFileReader fileReader = new VCFFileReader(file, false); final VCFHeader fileHeader = fileReader.getFileHeader(); if (fileHeader.getContigLines().isEmpty()) { if (sequenceDictionary == null) { throw new IllegalArgumentException(SEQ_DICT_REQUIRED); } else { fileHeader.setSequenceDictionary(sequenceDictionary); } } if (variantContextComparator == null) { variantContextComparator = fileHeader.getVCFRecordComparator(); } else { if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) { throw new IllegalArgumentException( "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others."); } } if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary(); if (sampleList.isEmpty()) { sampleList.addAll(fileHeader.getSampleNamesInOrder()); } else { if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) { throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files."); } } // add comments in the first header if (headers.isEmpty()) { COMMENT.stream().forEach(C -> fileHeader.addMetaDataLine(new VCFHeaderLine("MergeVcfs.comment", C))); } headers.add(fileHeader); iteratorCollection.add(fileReader.iterator()); } if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException(String.format("Index creation failed. %s", SEQ_DICT_REQUIRED)); } final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(OUTPUT) .setReferenceDictionary(sequenceDictionary); if (CREATE_INDEX) { builder.setOption(Options.INDEX_ON_THE_FLY); } else { builder.unsetOption(Options.INDEX_ON_THE_FLY); } final VariantContextWriter writer = builder.build(); writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList)); final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection); while (mergingIterator.hasNext()) { final VariantContext context = mergingIterator.next(); writer.add(context); progress.record(context.getContig(), context.getStart()); } CloserUtil.close(mergingIterator); writer.close(); return 0; }
Example #28
Source File: SplitVcfs.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); final ProgressLogger progress = new ProgressLogger(log, 10000); final VCFFileReader fileReader = new VCFFileReader(INPUT, false); final VCFHeader fileHeader = fileReader.getFileHeader(); final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary(); if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output."); } final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setReferenceDictionary(sequenceDictionary) .clearOptions(); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build(); final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build(); snpWriter.writeHeader(fileHeader); indelWriter.writeHeader(fileHeader); int incorrectVariantCount = 0; final CloseableIterator<VariantContext> iterator = fileReader.iterator(); while (iterator.hasNext()) { final VariantContext context = iterator.next(); if (context.isIndel()) indelWriter.add(context); else if (context.isSNP()) snpWriter.add(context); else { if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name()); else incorrectVariantCount++; } progress.record(context.getContig(), context.getStart()); } if (incorrectVariantCount > 0) { log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL"); } CloserUtil.close(iterator); CloserUtil.close(fileReader); snpWriter.close(); indelWriter.close(); return 0; }
Example #29
Source File: FilterVcf.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); VCFFileReader in = null; VariantContextWriter out = null; try {// try/finally used to close 'in' and 'out' in = new VCFFileReader(INPUT, false); final List<VariantFilter> variantFilters = new ArrayList<>(4); variantFilters.add(new AlleleBalanceFilter(MIN_AB)); variantFilters.add(new FisherStrandFilter(MAX_FS)); variantFilters.add(new QdFilter(MIN_QD)); if (JAVASCRIPT_FILE != null) { try { variantFilters.add(new VariantContextJavascriptFilter(JAVASCRIPT_FILE, in.getFileHeader())); } catch (final IOException error) { throw new PicardException("javascript-related error", error); } } final List<GenotypeFilter> genotypeFilters = CollectionUtil.makeList(new GenotypeQualityFilter(MIN_GQ), new DepthFilter(MIN_DP)); final FilterApplyingVariantIterator iterator = new FilterApplyingVariantIterator(in.iterator(), variantFilters, genotypeFilters); final VCFHeader header = in.getFileHeader(); // If the user is writing to a .bcf or .vcf, VariantContextBuilderWriter requires a Sequence Dictionary. Make sure that the // Input VCF has one. final VariantContextWriterBuilder variantContextWriterBuilder = new VariantContextWriterBuilder(); if (isVcfOrBcf(OUTPUT)) { final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary(); if (sequenceDictionary == null) { throw new PicardException("The input vcf must have a sequence dictionary in order to create indexed vcf or bcfs."); } variantContextWriterBuilder.setReferenceDictionary(sequenceDictionary); } out = variantContextWriterBuilder.setOutputFile(OUTPUT).build(); header.addMetaDataLine(new VCFFilterHeaderLine("AllGtsFiltered", "Site filtered out because all genotypes are filtered out.")); header.addMetaDataLine(new VCFFormatHeaderLine("FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype filters.")); for (final VariantFilter filter : variantFilters) { filter.headerLines().forEach(header::addMetaDataLine); } out.writeHeader(in.getFileHeader()); while (iterator.hasNext()) { final VariantContext vc = iterator.next(); progress.record(vc.getContig(), vc.getStart()); out.add(vc); } return 0; } finally { CloserUtil.close(out); CloserUtil.close(in); } }
Example #30
Source File: MakeSitesOnlyVcf.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final VCFFileReader reader = new VCFFileReader(INPUT, false); final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder()); final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary(); if (CREATE_INDEX && sequenceDictionary == null) { throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output."); } final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000); // Setup the site-only file writer final VariantContextWriterBuilder builder = new VariantContextWriterBuilder() .setOutputFile(OUTPUT) .setReferenceDictionary(sequenceDictionary); if (CREATE_INDEX) builder.setOption(Options.INDEX_ON_THE_FLY); else builder.unsetOption(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = builder.build(); final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE); writer.writeHeader(header); // Go through the input, strip the records and write them to the output final CloseableIterator<VariantContext> iterator = reader.iterator(); while (iterator.hasNext()) { final VariantContext full = iterator.next(); final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE); writer.add(site); progress.record(site.getContig(), site.getStart()); } CloserUtil.close(iterator); CloserUtil.close(reader); writer.close(); return 0; }