htsjdk.variant.vcf.VCFCodec Java Examples
The following examples show how to use
htsjdk.variant.vcf.VCFCodec.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VariantHotspotFile.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull public static ListMultimap<Chromosome, VariantHotspot> readFromVCF(@NotNull final String fileName) throws IOException { ListMultimap<Chromosome, VariantHotspot> result = ArrayListMultimap.create(); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(fileName, new VCFCodec(), false)) { for (VariantContext variantContext : reader.iterator()) { if (HumanChromosome.contains(variantContext.getContig())) { result.put(HumanChromosome.fromString(variantContext.getContig()), fromVariantContext(variantContext)); } } } return result; }
Example #2
Source File: GenomicsDBImportIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testPreserveContigOrderingInHeader() throws IOException { final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace"; ArrayList<SimpleInterval> intervals = new ArrayList<SimpleInterval>(Arrays.asList(new SimpleInterval("chr20", 17959479, 17959479))); writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), intervals, workspace, 0, false, 0, 1); try ( final FeatureReader<VariantContext> genomicsDBFeatureReader = getGenomicsDBFeatureReader(workspace, b38_reference_20_21); final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true); ) { final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader)genomicsDBFeatureReader.getHeader()).getSequenceDictionary(); final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader)inputGVCFReader.getHeader()).getSequenceDictionary(); Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF"); } }
Example #3
Source File: GatherVcfsCloudIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@SuppressWarnings({"unchecked"}) private void assertGatherProducesCorrectVariants(GatherVcfsCloud.GatherType gatherType, File expected, List<File> inputs) throws IOException { final File output = createTempFile("gathered", ".vcf.gz"); final ArgumentsBuilder args = new ArgumentsBuilder(); inputs.forEach(args::addInput); args.addOutput(output) .add(GatherVcfsCloud.GATHER_TYPE_LONG_NAME, gatherType.toString()); runCommandLine(args); try (final AbstractFeatureReader<VariantContext, ?> actualReader = AbstractFeatureReader.getFeatureReader(output.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity()); final AbstractFeatureReader<VariantContext, ?> expectedReader = AbstractFeatureReader.getFeatureReader(expected.getAbsolutePath(), null, new VCFCodec(), false, Function.identity(), Function.identity())) { final List<VariantContext> actualVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(actualReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList()); final List<VariantContext> expectedVariants = StreamSupport.stream(Spliterators.spliteratorUnknownSize(expectedReader.iterator(),Spliterator.ORDERED), false).collect(Collectors.toList()); VariantContextTestUtils.assertEqualVariants(actualVariants, expectedVariants); Assert.assertEquals(((VCFHeader) actualReader.getHeader()).getMetaDataInInputOrder(), ((VCFHeader) expectedReader.getHeader()).getMetaDataInInputOrder()); } }
Example #4
Source File: AmberSiteFactory.java From hmftools with GNU General Public License v3.0 | 6 votes |
@NotNull public static ListMultimap<Chromosome, AmberSite> sites(@NotNull final String vcfFile) throws IOException { final ListMultimap<Chromosome, AmberSite> result = ArrayListMultimap.create(); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = getFeatureReader(vcfFile, new VCFCodec(), false)) { for (VariantContext variant : reader.iterator()) { if (variant.isNotFiltered()) { if (HumanChromosome.contains(variant.getContig())) { HumanChromosome chromosome = HumanChromosome.fromString(variant.getContig()); result.put(chromosome, ImmutableAmberSite.builder() .chromosome(variant.getContig()) .position(variant.getStart()) .ref(variant.getReference().getBaseString()) .alt(variant.getAlternateAllele(0).getBaseString()) .snpCheck(variant.hasAttribute("SNPCHECK")) .build()); } } } } return result; }
Example #5
Source File: FeatureDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
protected static FeatureReader<VariantContext> getGenomicsDBFeatureReader(final GATKPath path, final File reference, final GenomicsDBOptions genomicsDBOptions) { final String workspace = IOUtils.getGenomicsDBAbsolutePath(path) ; if (workspace == null) { throw new IllegalArgumentException("Trying to create a GenomicsDBReader from non-GenomicsDB input path " + path); } else if (Files.notExists(IOUtils.getPath(workspace.endsWith("/") ? workspace : workspace + "/"))) { throw new UserException("GenomicsDB workspace " + path + " does not exist"); } final String callsetJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); final String vidmapJson = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); final String vcfHeader = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); IOUtils.assertPathsAreReadable(callsetJson, vidmapJson, vcfHeader); try { final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = createExportConfiguration(workspace, callsetJson, vidmapJson, vcfHeader, genomicsDBOptions); if (genomicsDBOptions.useBCFCodec()) { return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new BCF2Codec(), Optional.empty()); } else { return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new VCFCodec(), Optional.empty()); } } catch (final IOException e) { throw new UserException("Couldn't create GenomicsDBFeatureReader", e); } }
Example #6
Source File: GenotypeGVCFsIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Returns a list of VariantContext records from a VCF file * * @param vcfFile VCF file * @return list of VariantContext records * @throws IOException if the file does not exist or can not be opened */ private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException { final VCFCodec codec = new VCFCodec(); final FileInputStream s = new FileInputStream(vcfFile); final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s)); codec.readHeader(lineIteratorVCF); final List<VariantContext> VCs = new ArrayList<>(); while (lineIteratorVCF.hasNext()) { final String line = lineIteratorVCF.next(); Assert.assertFalse(line == null); VCs.add(codec.decode(line)); } return VCs; }
Example #7
Source File: GenomicsDBImportIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static FeatureReader<VariantContext> getGenomicsDBFeatureReader( final String workspace, final String reference, final boolean produceGTField, final boolean sitesOnlyQuery, final boolean useVCFCodec) throws IOException { String workspaceAbsPath = BucketUtils.makeFilePathAbsolute(workspace); GenomicsDBExportConfiguration.ExportConfiguration.Builder exportConfigurationBuilder = GenomicsDBExportConfiguration.ExportConfiguration.newBuilder() .setWorkspace(workspace) .setReferenceGenome(reference) .setVidMappingFile(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME)) .setCallsetMappingFile(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME)) .setVcfHeaderFilename(IOUtils.appendPathToDir(workspaceAbsPath, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME)) .setProduceGTField(produceGTField) .setSitesOnlyQuery(sitesOnlyQuery) .setGenerateArrayNameFromPartitionBounds(true); GenomicsDBVidMapProto.VidMappingPB vidMapPB = null; try { vidMapPB = org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.getProtobufVidMappingFromJsonFile(IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME)); } catch (final IOException e) { throw new UserException("Could not open vid json file "+GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME, e); } HashMap<String, Integer> fieldNameToIndexInVidFieldsList = org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.getFieldNameToListIndexInProtobufVidMappingObject(vidMapPB); vidMapPB = org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.updateINFOFieldCombineOperation(vidMapPB, fieldNameToIndexInVidFieldsList, GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY, "element_wise_sum"); if(vidMapPB != null) { exportConfigurationBuilder.setVidMapping(vidMapPB); } if (useVCFCodec) { return new GenomicsDBFeatureReader<>(exportConfigurationBuilder.build(), new VCFCodec(), Optional.empty()); } else { return new GenomicsDBFeatureReader<>(exportConfigurationBuilder.build(), new BCF2Codec(), Optional.empty()); } }
Example #8
Source File: VariantContextCollectionTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec() { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #9
Source File: FeatureManagerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@DataProvider(name = "DetectCorrectFileFormatTestData") public Object[][] getDetectCorrectFileFormatTestData() { return new Object[][] { { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf4_file.vcf"), VCFCodec.class }, { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf3_file.vcf"), VCF3Codec.class }, { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bcf_file.bcf"), BCF2Codec.class }, { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed"), BEDCodec.class}, { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_table_file.table"), TableCodec.class} }; }
Example #10
Source File: MakeVcfSampleNameMap.java From picard with MIT License | 5 votes |
private static AbstractFeatureReader<VariantContext, LineIterator> getReaderFromPath(final Path variantPath) { final String variantURI = variantPath.toAbsolutePath().toUri().toString(); try { return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false, Function.identity(), Function.identity()); } catch (final TribbleException e) { throw new PicardException("Failed to create reader from " + variantURI, e); } }
Example #11
Source File: GermlineVcfReader.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void processVcf(final String vcfFile) { try { LOGGER.info("processing germline VCF({})", vcfFile); mSampleGermlineSVs.clear(); mSvFactory = new StructuralVariantFactory(new AlwaysPassFilter()); final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader( vcfFile, new VCFCodec(), false); reader.iterator().forEach(x -> processVariant(x)); if(mSampleGermlineSVs.isEmpty()) return; if (mConfig.LinkByAssembly) annotateAssembledLinks(mSvAssemblyData); if(mConfig.CheckDisruptions) { final String sampleId = mSampleGermlineSVs.get(0).SampleId; mGeneImpact.findDisruptiveVariants(sampleId, mSampleGermlineSVs); } writeSVs(); } catch(IOException e) { LOGGER.error("error reading vcf({}): {}", vcfFile, e.toString()); } }
Example #12
Source File: SageHotspotAnnotationTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec() { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #13
Source File: RecoveredVariantFactoryTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec() { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #14
Source File: StrelkaAllelicDepthTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec() { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Lists.newArrayList(NORMAL, TUMOR)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #15
Source File: StructuralVariantFactoryTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec() { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #16
Source File: VariantContextFromString.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec(@NotNull String sample) { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(sample)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #17
Source File: SomaticVariantFactoryTest.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static VCFCodec createTestCodec() { VCFCodec codec = new VCFCodec(); VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE)); codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); return codec; }
Example #18
Source File: SomaticVariantFactory.java From hmftools with GNU General Public License v3.0 | 5 votes |
public void fromVCFFile(@NotNull final String tumor, @Nullable final String reference, @Nullable final String rna, @NotNull final String vcfFile, Consumer<SomaticVariant> consumer) throws IOException { final List<VariantContext> variants = Lists.newArrayList(); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = getFeatureReader(vcfFile, new VCFCodec(), false)) { final VCFHeader header = (VCFHeader) reader.getHeader(); if (!sampleInFile(tumor, header)) { throw new IllegalArgumentException("Sample " + tumor + " not found in vcf file " + vcfFile); } if (reference != null && !sampleInFile(reference, header)) { throw new IllegalArgumentException("Sample " + reference + " not found in vcf file " + vcfFile); } if (rna != null && !sampleInFile(rna, header)) { throw new IllegalArgumentException("Sample " + rna + " not found in vcf file " + vcfFile); } if (!header.hasFormatLine("AD")) { throw new IllegalArgumentException("Allelic depths is a required format field in vcf file " + vcfFile); } for (VariantContext variant : reader.iterator()) { if (filter.test(variant)) { createVariant(tumor, reference, rna, variant).ifPresent(consumer); } } } }
Example #19
Source File: StructuralVariantFileLoader.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull public static List<StructuralVariant> fromFile(@NotNull String vcfFileLocation, @NotNull VariantContextFilter filter) throws IOException { final StructuralVariantFactory factory = new StructuralVariantFactory(filter); try (final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(vcfFileLocation, new VCFCodec(), false)) { reader.iterator().forEach(factory::addVariantContext); } return factory.results(); }
Example #20
Source File: GatherVcfsCloud.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private static FeatureReader<VariantContext> getReaderFromVCFUri(final Path variantPath, final int cloudPrefetchBuffer) { final String variantURI = variantPath.toUri().toString(); return AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false, BucketUtils.getPrefetchingWrapper(cloudPrefetchBuffer), Function.identity()); }
Example #21
Source File: FeatureInputUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test public void testFeatureCodecCache() { Assert.assertEquals(getVariantFeatureInputWithCachedCodec().getFeatureCodecClass(), VCFCodec.class); }
Example #22
Source File: RecoveredVariantFactory.java From hmftools with GNU General Public License v3.0 | 4 votes |
RecoveredVariantFactory(@NotNull final PurityAdjuster purityAdjuster, @NotNull final String recoveryVCF) { reader = getFeatureReader(recoveryVCF, new VCFCodec(), true); ploidyFactory = new StructuralVariantLegPloidyFactory<>(purityAdjuster, PurpleCopyNumber::averageTumorCopyNumber); }
Example #23
Source File: ApplyVQSRIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test public void testApplyRecalibrationAlleleSpecificINDELmodeGZIPIndex() throws IOException { final File tempGZIPOut = createTempFile("testApplyRecalibrationAlleleSpecificINDELmodeGZIP", ".vcf.gz"); final File expectedFile = new File(getToolTestDataDir(), "expected/applyIndelAlleleSpecificResult.vcf"); final SimpleInterval queryInterval = new SimpleInterval("3:113005755-195507036"); // The input file is the same file as used in testApplyRecalibrationAlleleSpecificINDELmode, except that the // hg38 sequence dictionary has been transplanted into the header (the header itself has been modified so that // contig "chr3" is renamed to "3" to match the variants in this file), and the file is a .gz. final String base = " -L " + queryInterval.toString() + " -mode INDEL -AS" + " -ts-filter-level 99.3" + " --variant " + getToolTestDataDir() + "VQSR.AStest.postSNPinput.HACKEDhg38header.vcf.gz" + " --output " + tempGZIPOut.getAbsolutePath() + " --tranches-file " + getToolTestDataDir() + "VQSR.AStest.indels.tranches" + " --recal-file " + getToolTestDataDir() + "VQSR.AStest.indels.recal.vcf" + " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false"; final IntegrationTestSpec spec = new IntegrationTestSpec(base, Collections.emptyList()); spec.executeTest("testApplyRecalibrationAlleleSpecificINDELmodeGZIP", this); // make sure we got a tabix index final File tabixIndexFile = new File(tempGZIPOut.getAbsolutePath() + FileExtensions.TABIX_INDEX); Assert.assertTrue(tabixIndexFile.exists()); Assert.assertTrue(tabixIndexFile.length() > 0); // Now verify that the resulting index is valid by querying the same interval used above to run ApplyVQSR. // This should return all the variants in the file. Compare that with the number of records returned by (a // non-query) iterator that return all variants in the expected results file. try (final FeatureReader<VariantContext> expectedFileReader = AbstractFeatureReader.getFeatureReader(expectedFile.getAbsolutePath(), new VCFCodec(), false); final FeatureReader<VariantContext> outputFileReader = AbstractFeatureReader.getFeatureReader(tempGZIPOut.getAbsolutePath(), new VCFCodec())) { // results from the query should match the expected file results final long actualCount = outputFileReader.query( queryInterval.getContig(), queryInterval.getStart(), queryInterval.getEnd()).stream().count(); final long expectedCount = expectedFileReader.iterator().stream().count(); Assert.assertEquals(actualCount, expectedCount); } }