htsjdk.variant.vcf.VCFHeaderLine Java Examples
The following examples show how to use
htsjdk.variant.vcf.VCFHeaderLine.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FilterVariantTranches.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void writeVCFHeader(VariantContextWriter vcfWriter) { // setup the header fields final VCFHeader inputHeader = getHeaderForVariants(); Set<VCFHeaderLine> hInfo = new LinkedHashSet<VCFHeaderLine>(); hInfo.addAll(inputHeader.getMetaDataInSortedOrder()); boolean hasInfoKey = hInfo.stream().anyMatch( x -> x instanceof VCFInfoHeaderLine && ((VCFInfoHeaderLine) x).getID().equals(infoKey)); if (!hasInfoKey){ throw new UserException(String.format("Input VCF does not contain a header line for specified info key:%s", infoKey)); } if (removeOldFilters){ hInfo.removeIf(x -> x instanceof VCFFilterHeaderLine); } addTrancheHeaderFields(SNPString, snpTranches, hInfo); addTrancheHeaderFields(INDELString, indelTranches, hInfo); final TreeSet<String> samples = new TreeSet<>(); samples.addAll(inputHeader.getGenotypeSamples()); hInfo.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }
Example #2
Source File: GATKToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testGetDefaultToolVCFHeaderLines() throws IOException { final TestGATKToolWithFeatures tool = new TestGATKToolWithFeatures(); final File vcfFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/feature_data_source_test_with_bigHeader.vcf"); final String[] args = {"--mask", vcfFile.getCanonicalPath(), "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"}; tool.instanceMain(args); Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines(); VCFHeader hdr = new VCFHeader(stdHeaderLines); VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source"); Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName()); VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine"); Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName()); String commandLineString = commandLine.toString(); assertContains(commandLineString,"CommandLine="); assertContains(commandLineString,"Version="); assertContains(commandLineString,"Date="); }
Example #3
Source File: AnnotateVcfWithExpectedAlleleFraction.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam")); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); headerLines.addAll(getDefaultToolVCFHeaderLines()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions); final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream() .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction)); mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream() .mapToDouble(mixingfractionsMap::get).toArray(); }
Example #4
Source File: StrelkaPostProcessApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull public static VCFHeader generateOutputHeader(@NotNull final VCFHeader header, @NotNull final String sampleName) { final VCFHeader outputVCFHeader = new VCFHeader(header.getMetaDataInInputOrder(), Sets.newHashSet(sampleName)); outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT")); outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("AD")); outputVCFHeader.addMetaDataLine(new VCFHeaderLine("StrelkaGATKCompatibility", "Added GT fields to strelka calls for gatk compatibility.")); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("MAPPABILITY", 1, VCFHeaderLineType.Float, "Mappability (percentage)")); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("SOMATIC_PON_COUNT", 1, VCFHeaderLineType.Integer, "Number of times the variant appears in the somatic PON")); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine("GERMLINE_PON_COUNT", 1, VCFHeaderLineType.Integer, "Number of times the variant appears in the germline PON")); return outputVCFHeader; }
Example #5
Source File: GVCFWriterUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static VCFHeader getMinimalVCFHeader() { final Set<VCFHeaderLine> headerlines = new LinkedHashSet<>(); VCFStandardHeaderLines.addStandardFormatLines(headerlines, true, VCFConstants.GENOTYPE_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS); VCFStandardHeaderLines.addStandardInfoLines(headerlines, true, VCFConstants.DEPTH_KEY, VCFConstants.RMS_MAPPING_QUALITY_KEY, VCFConstants.MAPPING_QUALITY_ZERO_KEY ); Arrays.asList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY, GATKVCFConstants.CLIPPING_RANK_SUM_KEY, GATKVCFConstants.MLE_ALLELE_COUNT_KEY, GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY, GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY, GATKVCFConstants.READ_POS_RANK_SUM_KEY) .forEach( c -> headerlines.add(GATKVCFHeaderLines.getInfoLine(c))); headerlines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)); return new VCFHeader(headerlines, Collections.singleton(SAMPLE_NAME)); }
Example #6
Source File: FilterMutectCalls.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); Mutect2FilteringEngine.M_2_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.ARTIFACT_IN_NORMAL_FILTER_NAME, "artifact_in_normal")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_BASE_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median base quality")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_MAPPING_QUALITY_DIFFERENCE_FILTER_NAME, "ref - alt median mapping quality")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_CLIPPING_DIFFERENCE_FILTER_NAME, "ref - alt median clipping")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, "abs(ref - alt) median fragment length")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.READ_POSITION_FILTER_NAME, "median distance of alt variants from end of reads")); headerLines.add(new VCFFilterHeaderLine(Mutect2FilteringEngine.CONTAMINATION_FILTER_NAME, "contamination")); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); }
Example #7
Source File: GATKSparkToolUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testGetDefaultToolVCFHeaderLines() { final TestGATKSparkToolWithVariants tool = new TestGATKSparkToolWithVariants(); final String[] args = {"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"}; tool.instanceMain(args); Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines(); VCFHeader hdr = new VCFHeader(stdHeaderLines); VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source"); Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName()); VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine"); Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName()); String commandLineString = commandLine.toString(); assertContains(commandLineString,"CommandLine="); assertContains(commandLineString,"Version="); assertContains(commandLineString,"Date="); }
Example #8
Source File: SVVCFWriter.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static void writeVariants(final String fileName, final List<VariantContext> variantsArrayList, final SAMSequenceDictionary referenceSequenceDictionary, final Set<VCFHeaderLine> defaultToolVCFHeaderLines) { try (final OutputStream outputStream = new BufferedOutputStream(BucketUtils.createFile(fileName))) { final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary); final VCFHeader vcfHeader = getVcfHeader(referenceSequenceDictionary); defaultToolVCFHeaderLines.forEach(vcfHeader::addMetaDataLine); vcfWriter.writeHeader(vcfHeader); variantsArrayList.forEach(vcfWriter::add); vcfWriter.close(); } catch (final IOException e) { throw new GATKException("Could not create output file", e); } }
Example #9
Source File: FingerprintUtils.java From picard with MIT License | 6 votes |
private static VariantContextWriter getVariantContextWriter(final File outputFile, final File referenceSequenceFileName, final String sample, final String source, final ReferenceSequenceFile ref) { final VariantContextWriter variantContextWriter = new VariantContextWriterBuilder() .setReferenceDictionary(ref.getSequenceDictionary()) .setOutputFile(outputFile).build(); final Set<VCFHeaderLine> lines = new LinkedHashSet<>(); lines.add(new VCFHeaderLine("reference", referenceSequenceFileName.getAbsolutePath())); lines.add(new VCFHeaderLine("source", source)); lines.add(new VCFHeaderLine("fileDate", new Date().toString())); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_PL_KEY)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS)); lines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY)); final VCFHeader header = new VCFHeader(lines, Collections.singletonList(sample)); header.setSequenceDictionary(ref.getSequenceDictionary()); variantContextWriter.writeHeader(header); return variantContextWriter; }
Example #10
Source File: GenomicsDBImportIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testCommandIncludedInOutputHeader() throws IOException { final List<String> vcfInputs = LOCAL_GVCFS; final String workspace = createTempDir("genomicsdb-tests").getAbsolutePath() + "/workspace"; writeToGenomicsDB(vcfInputs, INTERVAL, workspace, 0, false, 0, 1); try(final FeatureReader<VariantContext> genomicsDBFeatureReader = getGenomicsDBFeatureReader(workspace, b38_reference_20_21)) { final VCFHeader header = (VCFHeader) genomicsDBFeatureReader.getHeader(); final Optional<VCFHeaderLine> commandLineHeaderLine = header.getMetaDataInSortedOrder().stream() .filter(line -> line.getValue().contains(GenomicsDBImport.class.getSimpleName())) .findAny(); Assert.assertTrue(commandLineHeaderLine.isPresent(), "no headerline was present containing information about the GenomicsDBImport command"); } }
Example #11
Source File: AnnotateVcfWithExpectedAlleleFraction.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam")); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); headerLines.addAll(getDefaultToolVCFHeaderLines()); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(vcfHeader); final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions); final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream() .collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction)); mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream() .mapToDouble(mixingfractionsMap::get).toArray(); }
Example #12
Source File: FilterAlignmentArtifacts.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { realignmentEngine = new RealignmentEngine(realignmentArgumentCollection); vcfWriter = createVCFWriter(new File(outputVcf)); final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder()); headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.ALIGNMENT_ARTIFACT_FILTER_NAME)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.UNITIG_SIZES_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ALIGNMENT_SCORE_DIFFERENCE_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.JOINT_ALIGNMENT_COUNT_KEY)); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter.writeHeader(vcfHeader); bamHeader = getHeaderForReads(); samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(bamHeader))); referenceReader = AssemblyBasedCallerUtils.createReferenceReader(Utils.nonNull(referenceArguments.getReferenceSpecifier())); assemblyEngine = MTAC.createReadThreadingAssembler(); likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs); haplotypeBAMWriter = bamOutputPath == null ? Optional.empty() : Optional.of(new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, IOUtils.getPath(bamOutputPath), true, false, getHeaderForSAMWriter())); }
Example #13
Source File: SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static CpxAndReInterpretedSimpleVariants extractCpxVariants(final JavaSparkContext ctx, final JavaRDD<AssemblyContigWithFineTunedAlignments> contigsWithCpxAln, final SvDiscoveryInputMetaData svDiscoveryInputMetaData, final JavaRDD<GATKRead> assemblyRawAlignments, final String outputPrefixWithSampleName) { final Logger toolLogger = svDiscoveryInputMetaData.getDiscoverStageArgs().runInDebugMode ? svDiscoveryInputMetaData.getToolLogger() : null; final Set<VCFHeaderLine> defaultToolVCFHeaderLines = svDiscoveryInputMetaData.getDefaultToolVCFHeaderLines(); final List<VariantContext> complexVariants = CpxVariantInterpreter.makeInterpretation(contigsWithCpxAln, svDiscoveryInputMetaData); SVVCFWriter.writeVCF(complexVariants, outputPrefixWithSampleName + COMPLEX_CHIMERA_VCF_FILE_NAME, svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(), defaultToolVCFHeaderLines, toolLogger); final JavaRDD<VariantContext> complexVariantsRDD = ctx.parallelize(complexVariants); final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants reInterpretedSimple = SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariantsRDD, svDiscoveryInputMetaData, assemblyRawAlignments); final SAMSequenceDictionary refSeqDict = svDiscoveryInputMetaData.getReferenceData().getReferenceSequenceDictionaryBroadcast().getValue(); final String derivedOneSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_1_SEG_CALL_VCF_FILE_NAME; final String derivedMultiSegmentSimpleVCF = outputPrefixWithSampleName + REINTERPRETED_MULTI_SEG_CALL_VCF_FILE_NAME; SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger); SVVCFWriter.writeVCF(reInterpretedSimple.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, refSeqDict, defaultToolVCFHeaderLines, toolLogger); return new CpxAndReInterpretedSimpleVariants(complexVariants, reInterpretedSimple.getMergedReinterpretedCalls()); }
Example #14
Source File: CpxVariantReInterpreterSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override protected void runTool(final JavaSparkContext ctx) { // TODO: 5/9/18 getback sample name in output files final SAMFileHeader headerForReads = getHeaderForReads(); final Set<VCFHeaderLine> defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); final SvDiscoveryInputMetaData svDiscoveryInputMetaData = new SvDiscoveryInputMetaData(ctx, discoverStageArgs, nonCanonicalChromosomeNamesFile, derivedSimpleVCFPrefix, null, null, null, null, headerForReads, getReference(), defaultToolVCFHeaderLines, localLogger); final JavaRDD<VariantContext> complexVariants = new VariantsSparkSource(ctx) .getParallelVariantContexts(complexVCF, getIntervals()); final JavaRDD<GATKRead> assemblyRawAlignments = getReads(); final SegmentedCpxVariantSimpleVariantExtractor.ExtractedSimpleVariants extract = SegmentedCpxVariantSimpleVariantExtractor.extract(complexVariants, svDiscoveryInputMetaData, assemblyRawAlignments); final String derivedOneSegmentSimpleVCF = derivedSimpleVCFPrefix + "_1_seg.vcf"; final String derivedMultiSegmentSimpleVCF = derivedSimpleVCFPrefix + "_multi_seg.vcf"; final VCFHeader vcfHeader = VariantsSparkSource.getHeader(complexVCF); SVVCFWriter.writeVCF(extract.getReInterpretZeroOrOneSegmentCalls(), derivedOneSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), defaultToolVCFHeaderLines, logger); SVVCFWriter.writeVCF(extract.getReInterpretMultiSegmentsCalls(), derivedMultiSegmentSimpleVCF, vcfHeader.getSequenceDictionary(), defaultToolVCFHeaderLines, logger); }
Example #15
Source File: SvDiscoveryInputMetaData.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
public SvDiscoveryInputMetaData(final JavaSparkContext ctx, final DiscoverVariantsFromContigAlignmentsSparkArgumentCollection discoverStageArgs, final String nonCanonicalChromosomeNamesFile, final String outputPath, final ReadMetadata readMetadata, final List<SVInterval> assembledIntervals, final PairedStrandedIntervalTree<EvidenceTargetLink> evidenceTargetLinks, final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast, final SAMFileHeader headerForReads, final ReferenceMultiSparkSource reference, final Set<VCFHeaderLine> defaultToolVCFHeaderLines, final Logger toolLogger) { final SAMSequenceDictionary sequenceDictionary = headerForReads.getSequenceDictionary(); final Broadcast<Set<String>> canonicalChromosomesBroadcast = ctx.broadcast(SVUtils.getCanonicalChromosomes(nonCanonicalChromosomeNamesFile, sequenceDictionary)); final String sampleId = SVUtils.getSampleId(headerForReads); this.referenceData = new ReferenceData(canonicalChromosomesBroadcast, ctx.broadcast(reference), ctx.broadcast(sequenceDictionary)); this.sampleSpecificData = new SampleSpecificData(sampleId, cnvCallsBroadcast, assembledIntervals, evidenceTargetLinks, readMetadata, ctx.broadcast(headerForReads)); this.discoverStageArgs = discoverStageArgs; this.outputPath = outputPath; this.defaultToolVCFHeaderLines = defaultToolVCFHeaderLines; this.toolLogger = toolLogger; }
Example #16
Source File: GenomicsDBImportIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static File createInputVCF(final String sampleName) { final String contig = "chr20"; final SAMSequenceDictionary dict = new SAMSequenceDictionary( Collections.singletonList(new SAMSequenceRecord(contig, 64444167))); final VCFFormatHeaderLine formatField = new VCFFormatHeaderLine(SAMPLE_NAME_KEY, 1, VCFHeaderLineType.String, "the name of the sample this genotype came from"); final Set<VCFHeaderLine> headerLines = new HashSet<>(); headerLines.add(formatField); headerLines.add(new VCFFormatHeaderLine(ANOTHER_ATTRIBUTE_KEY, 1, VCFHeaderLineType.Integer, "Another value")); headerLines.add(VCFStandardHeaderLines.getFormatLine("GT")); final File out = createTempFile(sampleName +"_", ".vcf"); try (final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(out.toPath(), dict, false, Options.INDEX_ON_THE_FLY)) { final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.singleton(sampleName)); vcfHeader.setSequenceDictionary(dict); writer.writeHeader(vcfHeader); final Allele Aref = Allele.create("A", true); final Allele C = Allele.create("C"); final List<Allele> alleles = Arrays.asList(Aref, C); final VariantContext variant = new VariantContextBuilder("invented", contig, INTERVAL.get(0).getStart(), INTERVAL.get(0).getStart(), alleles) .genotypes(new GenotypeBuilder(sampleName, alleles).attribute(SAMPLE_NAME_KEY, sampleName) .attribute(ANOTHER_ATTRIBUTE_KEY, 10).make()) .make(); writer.add(variant); return out; } }
Example #17
Source File: CNNScoreVariants.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void writeVCFHeader(VariantContextWriter vcfWriter) { // setup the header fields final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder(); final Set<VCFHeaderLine> hInfo = new HashSet<>(inputHeaders); hInfo.add(GATKVCFHeaderLines.getInfoLine(scoreKey)); final TreeSet<String> samples = new TreeSet<>(); samples.addAll(inputHeader.getGenotypeSamples()); hInfo.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }
Example #18
Source File: CombineGVCFs.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private VariantContextWriter getVCFWriter() { final SortedSet<String> samples = getSamplesForVariants(); final VCFHeader inputVCFHeader = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples); final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder()); headerLines.addAll(getDefaultToolVCFHeaderLines()); headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions()); // add headers for annotations added by this tool headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags if ( dbsnp.dbsnp != null ) { VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY); } if (somaticInput) { //single-sample M2 variant filter status will get moved to genotype filter headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); if (!dropSomaticFilteringAnnotations) { //standard M2 INFO annotations for filtering will get moved to FORMAT field for (final String key : Mutect2FilteringEngine.STANDARD_MUTECT_INFO_FIELDS_FOR_FILTERING) { headerLines.add(GATKVCFHeaderLines.getEquivalentFormatHeaderLine(key)); } } } VariantContextWriter writer = createVCFWriter(outputFile); final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples(); final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet)); writer.writeHeader(vcfHeader); return writer; }
Example #19
Source File: StructuralVariantHeader.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public static VCFHeader generateHeader(@NotNull final String purpleVersion, @NotNull final VCFHeader template) { final VCFHeader outputVCFHeader = new VCFHeader(template.getMetaDataInInputOrder(), template.getGenotypeSamples()); outputVCFHeader.addMetaDataLine(new VCFHeaderLine("purpleVersion", purpleVersion)); outputVCFHeader.addMetaDataLine(VCFStandardHeaderLines.getFormatLine("GT")); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.RECOVERED, 0, VCFHeaderLineType.Flag, RECOVERED_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.INFERRED, 0, VCFHeaderLineType.Flag, INFERRED_DESC)); outputVCFHeader.addMetaDataLine(new VCFFilterHeaderLine(INFERRED, INFERRED_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(StructuralVariantFactory.IMPRECISE, 0, VCFHeaderLineType.Flag, IMPRECISE_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(CIPOS, 2, VCFHeaderLineType.Integer, CIPOS_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(SVTYPE, 1, VCFHeaderLineType.String, SVTYPE_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_AF_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_AF_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_METHOD, 1, VCFHeaderLineType.String, RECOVERY_METHOD_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(RECOVERY_FILTER, UNBOUNDED, VCFHeaderLineType.String, RECOVERY_FILTER_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_JUNCTION_COPY_NUMBER_INFO, 1, VCFHeaderLineType.Float, PURPLE_JUNCTION_COPY_NUMBER_DESC)); outputVCFHeader.addMetaDataLine(new VCFInfoHeaderLine(PURPLE_CN_CHANGE_INFO, UNBOUNDED, VCFHeaderLineType.Float, PURPLE_CN_CHANGE_DESC)); return outputVCFHeader; }
Example #20
Source File: VariantRecalibrationUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Add the standard VCF header lines used with VQSR. * @param hInfo updated set of VCFHeaderLines */ protected static void addVQSRStandardHeaderLines(final Set<VCFHeaderLine> hInfo) { hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.VQS_LOD_KEY)); hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.CULPRIT_KEY)); hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.POSITIVE_LABEL_KEY)); hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NEGATIVE_LABEL_KEY)); hInfo.add(GATKVCFHeaderLines.getFilterLine(VCFConstants.PASSES_FILTERS_v4)); }
Example #21
Source File: VcfUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private Set<VCFHeaderLine> createHeaderLines() { Set<VCFHeaderLine> headerLines = new HashSet<>(2); headerLines.add(new VCFContigHeaderLine( "##contig=<ID=1,length=249250621,assembly=b37>", vcfHeaderVersion, "", 0)); headerLines.add(new VCFContigHeaderLine( "##contig=<ID=2,length=249250622,assembly=b37>", vcfHeaderVersion, "", 0)); return headerLines; }
Example #22
Source File: Mutect2Engine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void writeHeader(final VariantContextWriter vcfWriter, final Set<VCFHeaderLine> defaultToolHeaderLines) { final Set<VCFHeaderLine> headerInfo = new HashSet<>(); headerInfo.add(new VCFHeaderLine("MutectVersion", MUTECT_VERSION)); headerInfo.add(new VCFHeaderLine(FilterMutectCalls.FILTERING_STATUS_VCF_KEY, "Warning: unfiltered Mutect 2 calls. Please run " + FilterMutectCalls.class.getSimpleName() + " to remove false positives.")); headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions(false)); headerInfo.addAll(defaultToolHeaderLines); STANDARD_MUTECT_INFO_FIELDS.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerInfo::add); VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ALLELE_FRACTION_KEY)); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY)); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY)); headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.PHASE_SET_KEY)); ReadUtils.getSamplesFromHeader(header).stream().filter(this::isTumorSample) .forEach(s -> headerInfo.add(new VCFHeaderLine(TUMOR_SAMPLE_KEY_IN_VCF_HEADER, s))); normalSamples.forEach(sample -> headerInfo.add(new VCFHeaderLine(NORMAL_SAMPLE_KEY_IN_VCF_HEADER, sample))); if (emitReferenceConfidence()) { headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines()); headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY)); } final VCFHeader vcfHeader = new VCFHeader(headerInfo, samplesList.asListOfSamples()); vcfHeader.setSequenceDictionary(header.getSequenceDictionary()); vcfWriter.writeHeader(vcfHeader); }
Example #23
Source File: SVVCFWriter.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@VisibleForTesting static VCFHeader getVcfHeader(final SAMSequenceDictionary referenceSequenceDictionary) { final Set<VCFHeaderLine> headerLines = new HashSet<>(GATKSVVCFHeaderLines.getSymbAltAlleleLines()); headerLines.addAll(GATKSVVCFHeaderLines.getInfoLines()); headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); headerLines.addAll(GATKSVVCFHeaderLines.getFormatLines()); headerLines.addAll(GATKSVVCFHeaderLines.getFilterLines()); final VCFHeader header = new VCFHeader(new VCFHeader( headerLines )); header.setSequenceDictionary(referenceSequenceDictionary); return header; }
Example #24
Source File: VariantsSparkSinkUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static VCFHeader getHeader() { final Set<VCFHeaderLine> headerlines = new LinkedHashSet<>(); VCFStandardHeaderLines.addStandardFormatLines(headerlines, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.GENOTYPE_PL_KEY, VCFConstants.DEPTH_KEY); final SAMSequenceDictionary dict = new SAMSequenceDictionary( Collections.singletonList(new SAMSequenceRecord("1", 100))); final VCFHeader header = new VCFHeader(headerlines, Collections.singleton(SAMPLE)); header.setSequenceDictionary(dict); return header; }
Example #25
Source File: FixCallSetSampleOrdering.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { assertThatTheyReallyWantToProceed(); if (batchSize == 0) { throw new SampleNameFixingCannotProceedException("your callset is not affected by the bug if you ran with --"+ GenomicsDBImport.BATCHSIZE_ARG_LONG_NAME +" 0"); } if ( readerThreads > 1 ) { if ( gvcfToHeaderSampleMapFile == null ) { throw new SampleNameFixingCannotProceedException("You must provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " > 1"); } } else if ( gvcfToHeaderSampleMapFile != null ) { throw new SampleNameFixingCannotProceedException("You must NOT provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --" + GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME + " 1"); } final VCFHeader originalHeader = getHeaderForVariants(); final Set<VCFHeaderLine> originalHeaderLines = originalHeader.getMetaDataInInputOrder(); final Set<VCFHeaderLine> newHeaderLines = new LinkedHashSet<>(originalHeaderLines); newHeaderLines.addAll(getDefaultToolVCFHeaderLines()); loadSampleNameMappings(); final List<String> sampleNamesOriginalOrdering = new ArrayList<>(sampleNameMapFromGenomicsDBImport.keySet()); if( sampleNamesOriginalOrdering.size() <= batchSize ){ throw new SampleNameFixingCannotProceedException("you are not affected by the sample name ordering bug if your batch size is >= the number of samples in your callset. \n" + "batch size: " + batchSize + "\n" + "number of samples: " + sampleNamesOriginalOrdering.size()); } assertSampleNamesMatchInputVCF(originalHeader.getSampleNamesInOrder(), sampleNamesOriginalOrdering); final List<String> batchSortedSampleNames = getBatchSortedList(); final VCFHeader remappedHeader = new VCFHeader(newHeaderLines, batchSortedSampleNames); logger.info("Writing the new header with corrected sample names"); writer = createVCFWriter(output); writer.writeHeader(remappedHeader); logger.info("Copying the rest of the VCF"); }
Example #26
Source File: SamplePairExtractor.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * This method should not be needed in germline use cases, but should produce an empty list in such cases. * * Pairing is done as follows: * * - Using the M2 convention: "tumor_sample" in the header and "normal_sample" if applicable * - If there is only one sample, the pair will be the sample name, "". * - The samples have names such as "TUMOR" and "NORMAL". See {@link SamplePairExtractor#TUMOR_SAMPLE_NAME_LIST} and * {@link SamplePairExtractor#NORMAL_SAMPLE_NAME_LIST} * * If none of these are fulfilled, the returned list will be empty. * * @param vcfHeader Never {@code null}. Use blank constructor in {@link VCFHeader} rather than null. * @return Tumor-normal sample names. In tumor-only cases, the normal sample will have an empty string. * Never {@code null} */ public static List<TumorNormalPair> extractPossibleTumorNormalPairs(final VCFHeader vcfHeader) { Utils.nonNull(vcfHeader); // First attempt the M2 nomenclature if (vcfHeader.getMetaDataLine(Mutect2Engine.TUMOR_SAMPLE_KEY_IN_VCF_HEADER) != null) { final VCFHeaderLine normalMetaDataLine = vcfHeader.getMetaDataLine(Mutect2Engine.NORMAL_SAMPLE_KEY_IN_VCF_HEADER); return Collections.singletonList(new TumorNormalPair(vcfHeader.getMetaDataLine(Mutect2Engine.TUMOR_SAMPLE_KEY_IN_VCF_HEADER).getValue(), (normalMetaDataLine == null ? NO_NORMAL : normalMetaDataLine.getValue()))); } // Then try sample names (e.g. "TUMOR", "NORMAL") final List<String> sampleNames = vcfHeader.getSampleNamesInOrder(); if (sampleNames.size() == 1) { return Collections.singletonList(new TumorNormalPair(sampleNames.get(0), NO_NORMAL)); } if (sampleNames.size() > 0) { final List<String> tumorSamples = sampleNames.stream() .filter(s -> TUMOR_SAMPLE_NAME_LIST.contains(s)).collect(Collectors.toList()); final List<String> normalSamples = sampleNames.stream() .filter(s -> NORMAL_SAMPLE_NAME_LIST.contains(s)).collect(Collectors.toList()); return tumorSamples.stream().flatMap(t -> normalSamples.stream().map(n -> new TumorNormalPair(t,n))).collect(Collectors.toList()); } return Collections.emptyList(); }
Example #27
Source File: VcfUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "testData") public void testUpdateContigsReferenceNameOnly( final Set<VCFHeaderLine> inHeaderLines, final SAMSequenceDictionary seqDict, final Path referencePath, final boolean refNameOnly, final String expectedRefName) { Set<VCFHeaderLine> resultLines = VcfUtils.updateHeaderContigLines( inHeaderLines, referencePath, seqDict, refNameOnly ); Assert.assertEquals(resultLines.size(), referencePath == null ? 2 : 3); for (VCFHeaderLine resultLine : resultLines) { if (resultLine instanceof VCFContigHeaderLine) { VCFContigHeaderLine headerLine = (VCFContigHeaderLine) resultLine; SAMSequenceRecord samSeqRec = headerLine.getSAMSequenceRecord(); if (samSeqRec.getSequenceName().equals("contig1")) { Assert.assertEquals(samSeqRec.getSequenceLength(), 100); } else if (samSeqRec.getSequenceName().equals("contig2")) { Assert.assertEquals(samSeqRec.getSequenceLength(), 200); } else { Assert.fail("Bad sequence name in header lines"); } } else { if (referencePath != null) { Assert.assertEquals(resultLine.getValue(), expectedRefName); } else { Assert.fail("Unexpected reference name in header lines"); } } } }
Example #28
Source File: FilterMutectCalls.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void onTraversalStart() { Utils.resetRandomGenerator(); final VCFHeader inputHeader = getHeaderForVariants(); final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInSortedOrder().stream() .filter(line -> !line.getKey().equals(FILTERING_STATUS_VCF_KEY)) //remove header line from Mutect2 stating that calls are unfiltered. .collect(Collectors.toSet()); headerLines.add(new VCFHeaderLine(FILTERING_STATUS_VCF_KEY, "These calls have been filtered by " + FilterMutectCalls.class.getSimpleName() + " to label false positives with a list of failed filters and true positives with PASS.")); // all possible filters, even allele specific (since they can apply to the site as well GATKVCFConstants.MUTECT_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getFilterLine).forEach(headerLines::add); // these are the possible allele specific filters which will be in the INFO section // when all relevant alleles (non-symbolic, etc) are filtered, the filter will be applied to the site level filter also GATKVCFConstants.MUTECT_AS_FILTER_NAMES.stream().map(GATKVCFHeaderLines::getInfoLine).forEach(headerLines::add); headerLines.addAll(getDefaultToolVCFHeaderLines()); final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples()); vcfWriter = createVCFWriter(new File(outputVcf)); vcfWriter.writeHeader(vcfHeader); final File mutect2StatsTable = new File(statsTable == null ? drivingVariantFile + Mutect2.DEFAULT_STATS_EXTENSION : statsTable); filteringEngine = new Mutect2FilteringEngine(MTFAC, vcfHeader, mutect2StatsTable); if (!mutect2StatsTable.exists()) { throw new UserException.CouldNotReadInputFile("Mutect stats table " + mutect2StatsTable + " not found. When Mutect2 outputs a file calls.vcf it also creates" + " a calls.vcf" + Mutect2.DEFAULT_STATS_EXTENSION + " file. Perhaps this file was not moved along with the vcf, or perhaps it was not delocalized from a" + " virtual machine while running in the cloud." ); } }
Example #29
Source File: Mutect2FilteringEngine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Mutect2FilteringEngine(M2FiltersArgumentCollection MTFAC, final VCFHeader vcfHeader, final File mutectStatsTable) { thresholdCalculator = new ThresholdCalculator(MTFAC.thresholdStrategy, MTFAC.initialPosteriorThreshold, MTFAC.maxFalsePositiveRate, MTFAC.fScoreBeta); somaticClusteringModel = new SomaticClusteringModel(MTFAC, mutectStatsTable.exists() ? MutectStats.readFromFile(mutectStatsTable) : Collections.emptyList()); normalSamples = vcfHeader.getMetaDataInInputOrder().stream() .filter(line -> line.getKey().equals(Mutect2Engine.NORMAL_SAMPLE_KEY_IN_VCF_HEADER)) .map(VCFHeaderLine::getValue) .collect(Collectors.toSet()); buildFiltersList(MTFAC); filteringOutputStats = new FilteringOutputStats(filters); }
Example #30
Source File: SVVCFWriter.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * {@code referenceSequenceDictionary} is required because 2bit Broadcast references currently order their * sequence dictionaries in a scrambled order, see https://github.com/broadinstitute/gatk/issues/2037. */ public static void writeVCF(final List<VariantContext> localVariants, final String vcfFileName, final SAMSequenceDictionary referenceSequenceDictionary, final Set<VCFHeaderLine> defaultToolVCFHeaderLines, final Logger logger) { final List<VariantContext> sortedVariantsList = sortVariantsByCoordinate(localVariants, referenceSequenceDictionary); if (logger != null) logNumOfVarByTypes(sortedVariantsList, logger); writeVariants(vcfFileName, sortedVariantsList, referenceSequenceDictionary, defaultToolVCFHeaderLines); }