Java Code Examples for htsjdk.variant.vcf.VCFFileReader#close()
The following examples show how to use
htsjdk.variant.vcf.VCFFileReader#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MNVValidatorApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static void processVariants(boolean strelka, @NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String tumorBam) { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), "TUMOR"); final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(vcfReader.getFileHeader().getSequenceDictionary()) .build(); vcfWriter.writeHeader(outputHeader); final MNVValidator validator = ImmutableMNVValidator.of(tumorBam); final MNVMerger merger = ImmutableMNVMerger.of(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); for (final VariantContext rawVariant : vcfReader) { final VariantContext simplifiedVariant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant; final PotentialMNVRegion potentialMNV = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant); outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(vcfWriter::add)); } validator.mergeVariants(outputPair.getLeft(), merger).forEach(vcfWriter::add); vcfWriter.close(); vcfReader.close(); LOGGER.info("Written output variants to " + outputVcf); }
Example 2
Source File: SortVcf.java From picard with MIT License | 6 votes |
/** * Merge the inputs and sort them by adding each input's content to a single SortingCollection. * <p/> * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs. * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now. * MergeVcfs exists for simple merging of presorted inputs. * * @param readers - a list of VCFFileReaders, one for each input VCF * @param outputHeader - The merged header whose information we intend to use in the final output file */ private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) { final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records"); // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time. final SortingCollection<VariantContext> sorter = SortingCollection.newInstance( VariantContext.class, new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR); int readerCount = 1; for (final VCFFileReader reader : readers) { log.info("Reading entries from input file " + readerCount); for (final VariantContext variantContext : reader) { sorter.add(variantContext); readProgress.record(variantContext.getContig(), variantContext.getStart()); } reader.close(); readerCount++; } return sorter; }
Example 3
Source File: InputValidationTest.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public void testTabixIndexCreationChr20() throws IOException { String configFolder = "test-data/configs/hapmap-chr1"; // input folder contains no vcf or vcf.gz files String inputFolder = "test-data/data/chr20-phased"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "hapmap2"); // create step instance InputValidation inputValidation = new InputValidationMock(configFolder); // run and test boolean result = run(context, inputValidation); // check if step is failed assertEquals(true, result); assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found.")); // test tabix index and count snps String vcfFilename = inputFolder + "/chr20.R50.merged.1.330k.recode.small.vcf.gz"; VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename), new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true); CloseableIterator<VariantContext> snps = vcfReader.query("20", 1, 1000000000); int count = 0; while (snps.hasNext()) { snps.next(); count++; } snps.close(); vcfReader.close(); //check snps assertEquals(7824, count); }
Example 4
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
@Test public void testSimpleOverlap() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval("2", 167166899, 167166899)); final VCFFileReader reader = getReader(CEU_TRIOS_SNPS_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertTrue(iterator.hasNext()); final VariantContext ctx = iterator.next(); Assert.assertEquals(ctx.getStart(), 167166899); Assert.assertFalse(iterator.hasNext()); reader.close(); }
Example 5
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
@Test public void testNoVariants() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval(this.dict.getSequence(0).getSequenceName(), 1, 100)); final VCFFileReader reader = getReader(EMPTY_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertFalse(iterator.hasNext()); reader.close(); }
Example 6
Source File: TestFilterVcf.java From picard with MIT License | 5 votes |
/** * Consumes a VCF and returns a ListMap where each they keys are the IDs of filtered out sites and the values are the set of filters. */ private ListMap<String, String> slurpFilters(final File vcf) { final ListMap<String, String> map = new ListMap<>(); final VCFFileReader in = new VCFFileReader(vcf, false); for (final VariantContext ctx : in) { if (ctx.isNotFiltered()) continue; for (final String filter : ctx.getFilters()) { map.add(ctx.getID(), filter); } } in.close(); return map; }
Example 7
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
private SAMFileHeader getSAMFileHeader() { final VCFFileReader reader = getReader(CEU_TRIOS_SNPS_VCF); final SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary(); reader.close(); final SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(dict); return header; }
Example 8
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
@Test public void testVariantOverlappingMultipleIntervalsIsReturnedOnlyOnce() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this intervalList.add(new Interval("12", 68921964, 68921964)); // deletion spans this final VCFFileReader reader = getReader(CEU_TRIOS_INDELS_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertTrue(iterator.hasNext()); final VariantContext ctx = iterator.next(); Assert.assertEquals(ctx.getStart(), 68921960); Assert.assertEquals(ctx.getEnd(), 68921966); Assert.assertFalse(iterator.hasNext()); reader.close(); }
Example 9
Source File: RenameSampleInVcf.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final VCFFileReader in = new VCFFileReader(INPUT, false); final VCFHeader header = in.getFileHeader(); if (header.getGenotypeSamples().size() > 1) { throw new IllegalArgumentException("Input VCF must be single-sample."); } if (OLD_SAMPLE_NAME != null && !OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0))) { throw new IllegalArgumentException("Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0)); } final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS); if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY); final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME)); final VariantContextWriter out = new VariantContextWriterBuilder() .setOptions(options) .setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).build(); out.writeHeader(outHeader); for (final VariantContext ctx : in) { out.add(ctx); } out.close(); in.close(); return 0; }
Example 10
Source File: VcfFileSegmentGenerator.java From picard with MIT License | 5 votes |
private static List<SAMSequenceRecord> readSequences(final File vcf) { final VCFFileReader reader = new VCFFileReader(vcf); final VCFHeader header = reader.getFileHeader(); final SAMSequenceDictionary dict = header.getSequenceDictionary(); reader.close(); return dict.getSequences(); }
Example 11
Source File: InputValidationTest.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public void testTabixIndexCreationChr1() throws IOException { String configFolder = "test-data/configs/hapmap-chr1"; // input folder contains no vcf or vcf.gz files String inputFolder = "test-data/data/single"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "hapmap2"); context.setInput("phasing", "eagle"); // create step instance InputValidation inputValidation = new InputValidationMock(configFolder); // run and test boolean result = run(context, inputValidation); // check if step is failed assertEquals(true, result); assertTrue(context.hasInMemory("[OK] 1 valid VCF file(s) found.")); // test tabix index and count snps String vcfFilename = inputFolder + "/minimac_test.50.vcf.gz"; VCFFileReader vcfReader = new VCFFileReader(new File(vcfFilename), new File(vcfFilename + TabixUtils.STANDARD_INDEX_EXTENSION), true); CloseableIterator<VariantContext> snps = vcfReader.query("1", 1, 1000000000); int count = 0; while (snps.hasNext()) { snps.next(); count++; } snps.close(); vcfReader.close(); //check snps assertEquals(905, count); }
Example 12
Source File: PurpleStructuralVariantSupplier.java From hmftools with GNU General Public License v3.0 | 5 votes |
PurpleStructuralVariantSupplier(@NotNull final String version, @NotNull final String templateVCF, @NotNull final String outputVCF, @NotNull final String refGenomePath) { final VCFFileReader vcfReader = new VCFFileReader(new File(templateVCF), false); this.outputVCF = outputVCF; this.refGenomePath = refGenomePath; this.header = Optional.of(generateOutputHeader(version, vcfReader.getFileHeader())); this.variants = new VariantContextCollectionImpl(header.get()); for (VariantContext context : vcfReader) { variants.add(context); } vcfReader.close(); }
Example 13
Source File: ByIntervalListVariantContextIteratorTest.java From picard with MIT License | 5 votes |
@Test public void testNoOverlapDifferentContig() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval("3", 167166899, 167166899)); final VCFFileReader reader = getReader(CEU_TRIOS_SNPS_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertFalse(iterator.hasNext()); reader.close(); }
Example 14
Source File: ThreadsafeTest.java From picard with MIT License | 5 votes |
/** This test doesn't even test the class, it just makes sure the cornercase test data is really a cornercase */ @Test public void ensureTestDataActuallyHasWideVariantAtTenMillion() { final Joiner joiner = Joiner.on(":"); // Cheat: do a string compare final VCFFileReader r = new VCFFileReader(VCF_WITH_MULTI_ALLELIC_VARIANT_AT_POSITION_10MILLION); Assert.assertEquals( joiner.join(r.query("1", TEN_MILLION, TEN_MILLION)), joiner.join(r.query("1", TEN_MILLION + 5, TEN_MILLION + 5)) ); r.close(); }
Example 15
Source File: FastVCFFileReader.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public FastVCFFileReader(String vcfFilename) throws IOException { super(vcfFilename); // load header VCFFileReader reader = new VCFFileReader(new File(vcfFilename), false); VCFHeader header = reader.getFileHeader(); samples = header.getGenotypeSamples(); samplesCount = samples.size(); variantContext = new MinimalVariantContext(samplesCount); reader.close(); parser = new VCFLineParser(samplesCount); }
Example 16
Source File: StrelkaPostProcessApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private static void processVariants(@NotNull final String filePath, @NotNull final Slicer highConfidenceSlicer, @NotNull final String outputVcf, @NotNull final String sampleName, @NotNull final String tumorBam) { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = generateOutputHeader(vcfReader.getFileHeader(), sampleName); final VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(outputHeader.getSequenceDictionary()) .build(); writer.writeHeader(outputHeader); final MNVValidator validator = ImmutableMNVValidator.of(tumorBam); final MNVMerger merger = ImmutableMNVMerger.of(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); final VariantContextFilter filter = new StrelkaPostProcess(highConfidenceSlicer); for (final VariantContext variantContext : vcfReader) { if (filter.test(variantContext)) { final VariantContext simplifiedVariant = StrelkaPostProcess.simplifyVariant(variantContext, sampleName); final PotentialMNVRegion potentialMNV = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNV, simplifiedVariant); outputPair.getRight().ifPresent(mnvRegion -> validator.mergeVariants(mnvRegion, merger).forEach(writer::add)); } } validator.mergeVariants(outputPair.getLeft(), merger).forEach(writer::add); writer.close(); vcfReader.close(); LOGGER.info("Written output variants to " + outputVcf); }
Example 17
Source File: MNVDetectorApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private static void processVariants(@NotNull final String filePath, @NotNull final String outputVcf, @NotNull final String outputBed, boolean strelka) throws IOException { final VCFFileReader vcfReader = new VCFFileReader(new File(filePath), false); final VCFHeader outputHeader = strelka ? generateOutputHeader(vcfReader.getFileHeader(), StrelkaPostProcess.TUMOR_GENOTYPE) : vcfReader.getFileHeader(); final BufferedWriter bedWriter = new BufferedWriter(new FileWriter(outputBed, false)); final VariantContextWriter vcfWriter = new VariantContextWriterBuilder().setOutputFile(outputVcf) .setReferenceDictionary(outputHeader.getSequenceDictionary()) .build(); vcfWriter.writeHeader(outputHeader); Pair<PotentialMNVRegion, Optional<PotentialMNVRegion>> outputPair = ImmutablePair.of(PotentialMNVRegion.empty(), Optional.empty()); for (final VariantContext rawVariant : vcfReader) { final VariantContext variant = strelka ? StrelkaPostProcess.simplifyVariant(rawVariant, StrelkaPostProcess.TUMOR_GENOTYPE) : rawVariant; final PotentialMNVRegion potentialMNVregion = outputPair.getLeft(); outputPair = MNVDetector.addMnvToRegion(potentialMNVregion, variant); outputPair.getRight() .ifPresent(mnvRegion -> filterMnvRegion(mnvRegion).ifPresent(filteredRegion -> writeMnvRegionToFiles(filteredRegion, vcfWriter, bedWriter, "\n"))); } filterMnvRegion(outputPair.getLeft()).ifPresent(mnvRegion -> writeMnvRegionToFiles(mnvRegion, vcfWriter, bedWriter, "")); vcfWriter.close(); vcfReader.close(); bedWriter.close(); LOGGER.info("Written output variants to {}. Written bed regions to {}.", outputVcf, outputBed); }
Example 18
Source File: PonApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private void run() throws IOException, ExecutionException, InterruptedException { if (files.isEmpty()) { return; } final VCFFileReader dictionaryReader = new VCFFileReader(files.get(0), true); SAMSequenceDictionary dictionary = dictionaryReader.getFileHeader().getSequenceDictionary(); dictionaryReader.close(); for (SAMSequenceRecord samSequenceRecord : dictionary.getSequences()) { LOGGER.info("Processing sequence {}", samSequenceRecord.getSequenceName()); final PonBuilder ponBuilder = new PonBuilder(); final RunnableTaskCompletion runnableTaskCompletion = new RunnableTaskCompletion(); List<Future<?>> contigFutures = Lists.newArrayList(); for (Path file : Files.newDirectoryStream(new File(input).toPath(), GLOB)) { Runnable runnable = () -> addVariantsFromFileToBuilder(ponBuilder, samSequenceRecord, file); contigFutures.add(executorService.submit(runnableTaskCompletion.task(runnable))); } for (Future<?> contigFuture : contigFutures) { contigFuture.get(); } vcf.write(ponBuilder.build()); } }
Example 19
Source File: ImputationChrXTest.java From imputationserver with GNU Affero General Public License v3.0 | 4 votes |
@Test public void testPipelineChrXWithEaglePhasingOnly() throws IOException, ZipException { if (!new File( "test-data/configs/hapmap-chrX-hg38/ref-panels/ALL.X.nonPAR.phase1_v3.snps_indels_svs.genotypes.all.noSingleton.recode.hg38.bcf") .exists()) { System.out.println("chrX bcf nonPAR file not available"); return; } String configFolder = "test-data/configs/hapmap-chrX"; String inputFolder = "test-data/data/chrX-unphased"; // create workflow context WorkflowTestContext context = buildContext(inputFolder, "phase1"); context.setInput("mode", "phasing"); // run qc to create chunkfile QcStatisticsMock qcStats = new QcStatisticsMock(configFolder); boolean result = run(context, qcStats); assertTrue(result); // add panel to hdfs importRefPanel(FileUtil.path(configFolder, "ref-panels")); // importMinimacMap("test-data/B38_MAP_FILE.map"); importBinaries("files/bin"); // run imputation ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder); result = run(context, imputation); assertTrue(result); // run export CompressionEncryptionMock export = new CompressionEncryptionMock("files"); result = run(context, export); assertTrue(result); ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray()); zipFile.extractAll("test-data/tmp"); VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.phased.vcf.gz", 100000000, false); assertEquals(true, vcfFile.isPhased()); VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false); CloseableIterator<VariantContext> it = vcfReader.iterator(); while (it.hasNext()) { VariantContext line = it.next(); if (line.getStart() == 44322058) { assertEquals("A", line.getGenotype("HG00096").getGenotypeString()); System.out.println(line.getGenotype("HG00097").getGenotypeString()); assertEquals("A|A", line.getGenotype("HG00097").getGenotypeString()); } } vcfReader.close(); FileUtil.deleteDirectory("test-data/tmp"); }
Example 20
Source File: ImputationChrXTest.java From imputationserver with GNU Affero General Public License v3.0 | 4 votes |
@Test public void testChrXLeaveOneOutPipelinePhased() throws IOException, ZipException { // SNP 26963697 from input excluded and imputed! // true genotypes: // 1,1|1,1|1,1|1,1,1|1,1,1|1,1|1,1,0,1|1,1|0,1,1,1,1,1,1|1,1,1|1,1|1,1|1,1|1,1|1,1|0, String configFolder = "test-data/configs/hapmap-chrX"; String inputFolder = "test-data/data/chrX-phased-loo"; File file = new File("test-data/tmp"); if (file.exists()) { FileUtil.deleteDirectory(file); } // create workflow context WorkflowTestContext context = buildContext(inputFolder, "phase1"); // run qc to create chunkfile QcStatisticsMock qcStats = new QcStatisticsMock(configFolder); boolean result = run(context, qcStats); assertTrue(result); // add panel to hdfs importRefPanel(FileUtil.path(configFolder, "ref-panels")); // importMinimacMap("test-data/B38_MAP_FILE.map"); importBinaries("files/bin"); // run imputation ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder); result = run(context, imputation); assertTrue(result); // run export CompressionEncryptionMock export = new CompressionEncryptionMock("files"); result = run(context, export); assertTrue(result); ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_X.zip", PASSWORD.toCharArray()); zipFile.extractAll("test-data/tmp"); VcfFile vcfFile = VcfFileUtil.load("test-data/tmp/chrX.dose.vcf.gz", 100000000, false); VCFFileReader vcfReader = new VCFFileReader(new File(vcfFile.getVcfFilename()), false); CloseableIterator<VariantContext> it = vcfReader.iterator(); while (it.hasNext()) { VariantContext line = it.next(); if (line.getStart() == 26963697) { assertEquals(2, line.getHetCount()); assertEquals(1, line.getHomRefCount()); assertEquals(23, line.getHomVarCount()); } } vcfReader.close(); FileUtil.deleteDirectory(file); }