Java Code Examples for htsjdk.samtools.util.IntervalList#write()
The following examples show how to use
htsjdk.samtools.util.IntervalList#write() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalListTools.java From picard with MIT License | 6 votes |
/** * Method to scatter an interval list by locus. * * @param list The list of intervals to scatter * @return The scattered intervals, represented as a {@link List} of {@link IntervalList} */ private ScatterSummary writeScatterIntervals(final IntervalList list) { final IntervalListScatterer scatterer = SUBDIVISION_MODE.make(); final IntervalListScatter scatter = new IntervalListScatter(scatterer, list, SCATTER_COUNT); final DecimalFormat fileNameFormatter = new DecimalFormat("0000"); int fileIndex = 1; final ScatterSummary summary = new ScatterSummary(); for (final IntervalList intervals : scatter) { summary.size++; summary.baseCount += intervals.getBaseCount(); summary.intervalCount += intervals.getIntervals().size(); intervals.write(createDirectoryAndGetScatterFile(OUTPUT, SCATTER_COUNT, fileNameFormatter.format(fileIndex++))); } return summary; }
Example 2
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData") public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd ) throws Exception { final SAMFileHeader picardFileHeader = new SAMFileHeader(); picardFileHeader.addSequence(genomeLocParser.getContigInfo("1")); picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000)); final IntervalList picardIntervals = new IntervalList(picardFileHeader); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list"); picardIntervals.write(picardIntervalFile); // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser, // and should throw for all intervals in our invalidIntervalTestData set IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath()); }
Example 3
Source File: IntervalUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(expectedExceptions={UserException.MalformedFile.class, UserException.MalformedGenomeLoc.class}, dataProvider="invalidIntervalTestData") public void testLoadIntervalsInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd ) throws Exception { SAMFileHeader picardFileHeader = new SAMFileHeader(); picardFileHeader.addSequence(genomeLocParser.getContigInfo("1")); // Intentionally add an extra contig not in our genomeLocParser's sequence dictionary // so that we can add intervals to the Picard interval file for contigs not in our dictionary picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000)); IntervalList picardIntervals = new IntervalList(picardFileHeader); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); File picardIntervalFile = createTempFile("testLoadIntervalsInvalidPicardIntervalHandling", ".intervals"); picardIntervals.write(picardIntervalFile); List<String> intervalArgs = new ArrayList<>(1); intervalArgs.add(picardIntervalFile.getAbsolutePath()); // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser, // and should throw for all intervals in our invalidIntervalTestData set IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser); }
Example 4
Source File: GenomicsDBImport.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Before traversal, fix configuration parameters and initialize * GenomicsDB. Hard-coded to handle only VCF files and headers */ @Override public void onTraversalStart() { if (getIntervalsFromExistingWorkspace) { final IntervalList outputList = new IntervalList(getBestAvailableSequenceDictionary()); intervals.forEach(i -> outputList.add(new Interval(i.getContig(), i.getStart(), i.getEnd()))); final Path intervalListOutputPath = IOUtils.getPath(intervalListOutputPathString); outputList.write(intervalListOutputPath.toFile()); return; } String workspaceDir = BucketUtils.makeFilePathAbsolute(overwriteCreateOrCheckWorkspace()); vidMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); callsetMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); vcfHeaderFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); if (doIncrementalImport) { logger.info("Callset Map JSON file will be re-written to " + callsetMapJSONFile); logger.info("Incrementally importing to workspace - " + workspaceDir); } else { logger.info("Vid Map JSON file will be written to " + vidMapJSONFile); logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile); logger.info("Complete VCF Header will be written to " + vcfHeaderFile); logger.info("Importing to workspace - " + workspaceDir); } initializeInputPreloadExecutorService(); }
Example 5
Source File: FilterIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public Object doWork() { validateArguments(); resolveAndValidateIntervals(); final SimpleIntervalCollection filteredIntervals = filterIntervals(); logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile.getAbsolutePath())); final IntervalList filteredIntervalList = new IntervalList(filteredIntervals.getMetadata().getSequenceDictionary()); filteredIntervals.getIntervals().forEach(i -> filteredIntervalList.add(new Interval(i))); filteredIntervalList.write(outputFilteredIntervalsFile); logger.info(String.format("%s complete.", getClass().getSimpleName())); return null; }
Example 6
Source File: IntervalUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Splits an interval list into multiple files. * @param fileHeader The sam file header. * @param splits Pre-divided genome locs returned by splitFixedIntervals. * @param scatterParts The output interval lists to write to. */ public static void scatterFixedIntervals(final SAMFileHeader fileHeader, final List<List<GenomeLoc>> splits, final List<File> scatterParts) { Utils.nonNull(fileHeader, "fileHeader is null"); Utils.nonNull(splits, "splits is null"); Utils.nonNull(scatterParts, "scatterParts is null"); Utils.containsNoNull(splits, "null split loc"); if (splits.size() != scatterParts.size()) { throw new CommandLineException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size())); } int fileIndex = 0; int locIndex = 1; for (final List<GenomeLoc> split : splits) { final IntervalList intervalList = new IntervalList(fileHeader); for (final GenomeLoc loc : split) { intervalList.add(toInterval(loc, locIndex++)); } intervalList.write(scatterParts.get(fileIndex++)); } }
Example 7
Source File: PreprocessIntervals.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public Object onTraversalSuccess() { final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary(); final List<SimpleInterval> inputIntervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary); // if the user didn't add any intervals, // we assume that they wanted to do whole genome sequencing logger.info("Padding intervals..."); final IntervalList paddedIntervalList = padIntervals(inputIntervals, padding, sequenceDictionary); logger.info("Generating bins..."); final IntervalList unfilteredBins = generateBins(paddedIntervalList, binLength, sequenceDictionary); logger.info("Filtering bins containing only Ns..."); final ReferenceDataSource reference = ReferenceDataSource.of(referenceArguments.getReferencePath()); final IntervalList bins = filterBinsContainingOnlyNs(unfilteredBins, reference); logger.info(String.format("Writing bins to %s...", outputPreprocessedIntervalsFile.getAbsolutePath())); bins.write(outputPreprocessedIntervalsFile); logger.info(String.format("%s complete.", getClass().getSimpleName())); return null; }
Example 8
Source File: CreateSnpIntervalFromVcf.java From Drop-seq with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IntervalList result = processData(this.INPUT, this.SEQUENCE_DICTIONARY, SAMPLE, this.GQ_THRESHOLD, this.HET_SNPS_ONLY); result.write(this.OUTPUT); return 0; }
Example 9
Source File: FilterIntervalsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@DataProvider(name = "dataAllFilters") public Object[][] dataAllFilters() { final int numSamples = 100; final int numIntervals = 120; final int numIntervalsBelowCountThreshold = 10; final int numIntervalsFailingAnnotationFilters = 10; final int numUncorruptedSamples = 10; final int lowCountFilterCountThreshold = 5; final double epsilon = 1E-1; final double percentageOfSamples = 100. * (numSamples - numUncorruptedSamples) / numSamples - epsilon; final File intervalsFile = createTempFile("intervals", ".interval_list"); final File annotatedIntervalsFile = createTempFile("annotated-intervals", ".tsv"); final List<File> countFiles = new ArrayList<>(numSamples); for (int sampleIndex = 0; sampleIndex < numSamples; sampleIndex++) { final String sampleName = String.format("sample-%d", sampleIndex); final SampleLocatableMetadata sampleMetadata = new SimpleSampleLocatableMetadata(sampleName, SEQUENCE_DICTIONARY); final List<SimpleCount> counts = new ArrayList<>(numIntervals); for (int intervalIndex = 0; intervalIndex < numIntervals; intervalIndex++) { final SimpleInterval interval = new SimpleInterval("20", 10 * intervalIndex + 1, 10 * (intervalIndex + 1)); if (intervalIndex < numIntervalsBelowCountThreshold && sampleIndex >= numUncorruptedSamples) { //corrupt first numIntervalsBelowCountThreshold intervals in last (numSamples - numUncorruptedSamples) samples with low counts counts.add(new SimpleCount(interval, lowCountFilterCountThreshold - 1)); } else { counts.add(new SimpleCount(interval, intervalIndex + lowCountFilterCountThreshold)); } } final SimpleCountCollection sampleCounts = new SimpleCountCollection(sampleMetadata, counts); final File countFile = createTempFile(sampleName, ".tsv"); sampleCounts.write(countFile); countFiles.add(countFile); if (sampleIndex == 0) { final List<SimpleInterval> sampleIntervals = sampleCounts.getIntervals(); final IntervalList intervals = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary()); sampleIntervals.forEach(i -> intervals.add(new Interval(i))); intervals.write(intervalsFile); final AnnotationMap passingAnnotation = new AnnotationMap(Arrays.asList( Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.5), Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.5), Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.5))); final AnnotationMap failingAnnotation = new AnnotationMap(Arrays.asList( Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.05), Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.95), Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.05))); final List<AnnotatedInterval> annotatedIntervals = IntStream.range(0, numIntervals).boxed() .map(i -> new AnnotatedInterval(new SimpleInterval(sampleIntervals.get(i)), //last numIntervalsFailingAnnotationFilters intervals fail annotation-based filters i >= numIntervals - numIntervalsFailingAnnotationFilters ? failingAnnotation : passingAnnotation)) .collect(Collectors.toList()); new AnnotatedIntervalCollection(sampleMetadata, annotatedIntervals).write(annotatedIntervalsFile); } } return new Object[][]{ //intervals file, array of strings for excluded intervals, annotated-intervals file, min/max GC content, mix/max mappability, min/max seg-dupe content, //count files, lowCountFilterCountThreshold, lowCountFilterPercentageOfSamples, //extremeCountFilterMinimumPercentile, extremeCountFilterMaximumPercentile, extremeCountFilterPercentageOfSamples, //expected array of indices of retained intervals {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}, //allow last numIntervalsFailingAnnotationFilters to pass annotation-based filters but exclude using -XL {intervalsFile, Collections.singletonList(String.format("20:%d-%d", (numIntervals - numIntervalsFailingAnnotationFilters) * 10 + 5, numIntervals * 10)), annotatedIntervalsFile, 0, 1, 0, 1, 0, 1, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}}; }
Example 10
Source File: FilterIntervalsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@DataProvider(name = "dataCountBasedFilters") public Object[][] dataCountBasedFilters() { final int numSamples = 100; final int numIntervals = 110; final int numIntervalsBelowCountThreshold = 10; final int numUncorruptedSamples = 10; final int lowCountFilterCountThreshold = 5; final double epsilon = 1E-1; final double percentageOfSamples = 100. * (numSamples - numUncorruptedSamples) / numSamples - epsilon; final File intervalsFile = createTempFile("intervals", ".interval_list"); final File intervalsWithExtraIntervalFile = createTempFile("intervals-with-extra-interval", ".interval_list"); final List<File> countFiles = new ArrayList<>(numSamples); for (int sampleIndex = 0; sampleIndex < numSamples; sampleIndex++) { final String sampleName = String.format("sample-%d", sampleIndex); final SampleLocatableMetadata sampleMetadata = new SimpleSampleLocatableMetadata(sampleName, SEQUENCE_DICTIONARY); final List<SimpleCount> counts = new ArrayList<>(numIntervals); for (int intervalIndex = 0; intervalIndex < numIntervals; intervalIndex++) { final SimpleInterval interval = new SimpleInterval("20", 10 * intervalIndex + 1, 10 * (intervalIndex + 1)); if (intervalIndex < numIntervalsBelowCountThreshold && sampleIndex >= numUncorruptedSamples) { //corrupt first numIntervalsBelowCountThreshold intervals in last (numSamples - numUncorruptedSamples) samples with low counts counts.add(new SimpleCount(interval, lowCountFilterCountThreshold - 1)); } else { counts.add(new SimpleCount(interval, intervalIndex + lowCountFilterCountThreshold)); } } final SimpleCountCollection sampleCounts = new SimpleCountCollection(sampleMetadata, counts); final File countFile = createTempFile(sampleName, ".tsv"); sampleCounts.write(countFile); countFiles.add(countFile); if (sampleIndex == 0) { final IntervalList intervals = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary()); sampleCounts.getIntervals().forEach(i -> intervals.add(new Interval(i))); intervals.write(intervalsFile); //test that intersection gets rid of extra intervals in the interval list final IntervalList intervalsWithExtraInterval = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary()); sampleCounts.getIntervals().forEach(i -> intervalsWithExtraInterval.add(new Interval(i))); intervalsWithExtraInterval.add(new Interval("20", 100000, 200000)); intervalsWithExtraInterval.write(intervalsWithExtraIntervalFile); } } return new Object[][]{ //intervals file, count files, lowCountFilterCountThreshold, lowCountFilterPercentageOfSamples, //extremeCountFilterMinimumPercentile, extremeCountFilterMaximumPercentile, extremeCountFilterPercentageOfSamples, //expected array of indices of retained intervals {intervalsFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())}, {intervalsFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}, {intervalsWithExtraIntervalFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())}, {intervalsWithExtraIntervalFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples, IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}}; }
Example 11
Source File: FilterIntervalsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@DataProvider(name = "dataAnnotationBasedFilters") public Object[][] dataAnnotationBasedFilters() { final File annotatedIntervalsFile = createTempFile("annotated-intervals", ".tsv"); ANNOTATED_INTERVALS.write(annotatedIntervalsFile); final File intervalsFile = createTempFile("intervals", ".interval_list"); final IntervalList intervals = new IntervalList(ANNOTATED_INTERVALS.getMetadata().getSequenceDictionary()); ANNOTATED_INTERVALS.getIntervals().forEach(i -> intervals.add(new Interval(i))); intervals.write(intervalsFile); //test that intersection gets rid of extra intervals in the interval list final File intervalsWithExtraIntervalFile = createTempFile("intervals-with-extra-interval", ".interval_list"); final IntervalList intervalsWithExtraInterval = new IntervalList(ANNOTATED_INTERVALS.getMetadata().getSequenceDictionary()); ANNOTATED_INTERVALS.getIntervals().forEach(i -> intervalsWithExtraInterval.add(new Interval(i))); intervalsWithExtraInterval.add(new Interval("20", 100000, 200000)); intervalsWithExtraInterval.write(intervalsWithExtraIntervalFile); return new Object[][]{ //intervals file, array of strings for excluded intervals, annotated-intervals file, //min/max GC content, mix/max mappability, min/max seg-dupe content, expected array of indices of retained intervals {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(0, 1, 2, 3, 4, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 2, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 3, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0.1, 0.9, Arrays.asList(2, 3, 4, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0.1, 0.9, Arrays.asList(2, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 3, 5)}, {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 5)}, {intervalsFile, Collections.singletonList("20:1-10"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(1, 2, 3, 4, 5)}, {intervalsFile, Arrays.asList("20:1-15", "20:35-45"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(2, 5)}, {intervalsFile, Collections.singletonList("20:25-50"), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(0, 1, 2, 3, 4, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 2, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 3, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0.1, 0.9, Arrays.asList(2, 3, 4, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0.1, 0.9, Arrays.asList(2, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 3, 5)}, {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 5)}, {intervalsWithExtraIntervalFile, Collections.singletonList("20:1-10"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(1, 2, 3, 4, 5)}, {intervalsWithExtraIntervalFile, Arrays.asList("20:1-15", "20:35-45"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(2, 5)}, {intervalsWithExtraIntervalFile, Collections.singletonList("20:25-50"), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 5)}}; }
Example 12
Source File: CollectHsMetricsTest.java From picard with MIT License | 4 votes |
@Test public void testHsMetricsHandlesIndelsAppropriately() throws IOException { final SAMRecordSetBuilder withDeletions = new SAMRecordSetBuilder(true, SortOrder.coordinate); final SAMRecordSetBuilder withInsertions = new SAMRecordSetBuilder(true, SortOrder.coordinate); final IntervalList targets = new IntervalList(withDeletions.getHeader()); final IntervalList baits = new IntervalList(withDeletions.getHeader()); targets.add(new Interval("chr1", 1000, 1199, false, "t1")); baits.add(new Interval("chr1", 950, 1049, false, "b1")); baits.add(new Interval("chr1", 1050, 1149, false, "b2")); baits.add(new Interval("chr1", 1150, 1249, false, "b3")); // Generate 100 reads that fully cover the the target in each BAM for (int i=0; i<100; ++i) { withDeletions.addFrag( "d" + i, 0, 1000, false, false, "100M20D80M", null, 30); withInsertions.addFrag("i" + i, 0, 1000, false, false, "100M50I100M", null, 30); } // Write things out to file final File dir = IOUtil.createTempDir("hsmetrics.", ".test"); final File bs = new File(dir, "baits.interval_list").getAbsoluteFile(); final File ts = new File(dir, "targets.interval_list").getAbsoluteFile(); baits.write(bs); targets.write(ts); final File withDelBam = writeBam(withDeletions, new File(dir, "with_del.bam")); final File withInsBam = writeBam(withInsertions, new File(dir, "with_ins.bam")); // Now run CollectHsMetrics four times final File out = Files.createTempFile("hsmetrics.", ".txt").toFile(); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath())); final HsMetrics delsWithoutIndelHandling = readMetrics(out); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath())); final HsMetrics delsWithIndelHandling = readMetrics(out); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithoutIndelHandling = readMetrics(out); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithIndelHandling = readMetrics(out); IOUtil.deleteDirectoryTree(dir); Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0); // 100X over 180/200 bases due to deletion Assert.assertEquals(delsWithIndelHandling.MEAN_TARGET_COVERAGE, 100.0); // 100X with counting the deletion Assert.assertEquals(insWithoutIndelHandling.PCT_USABLE_BASES_ON_TARGET, 200/250d); // 50/250 inserted bases are not counted as on target Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET, 1.0d); // inserted bases are counted as on target }
Example 13
Source File: CollectRnaSeqMetricsTest.java From picard with MIT License | 4 votes |
@Test public void testPctRibosomalBases() throws Exception { final String sequence = "chr1"; final String ignoredSequence = "chrM"; // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic. final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); // Set seed so that strandedness is consistent among runs. builder.setRandomSeed(0); final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence); builder.addPair("rrnaPair", sequenceIndex, 400, 500, false, false, "35I1M", "35I1M", true, true, -1); builder.addFrag("frag1", sequenceIndex, 150, true, false, "34I2M", "*", -1); builder.addFrag("frag2", sequenceIndex, 190, true, false, "35I1M", "*", -1); builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false); final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam"); samFile.deleteOnExit(); try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) { for (final SAMRecord rec : builder.getRecords()) { samWriter.addAlignment(rec); } } // Create an interval list with one ribosomal interval. final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA"); final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader()); rRnaIntervalList.add(rRnaInterval); final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list"); rRnaIntervalsFile.deleteOnExit(); rRnaIntervalList.write(rRnaIntervalsFile); // Generate the metrics. final File metricsFile = File.createTempFile("tmp.", ".rna_metrics"); metricsFile.deleteOnExit(); final String[] args = new String[]{ "INPUT=" + samFile.getAbsolutePath(), "OUTPUT=" + metricsFile.getAbsolutePath(), "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(), "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(), "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND", "IGNORE_SEQUENCE=" + ignoredSequence }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>(); output.read(new FileReader(metricsFile)); final RnaSeqMetrics metrics = output.getMetrics().get(0); Assert.assertEquals(metrics.PCT_RIBOSOMAL_BASES, 0.4); }
Example 14
Source File: CollectRnaSeqMetricsTest.java From picard with MIT License | 4 votes |
@Test public void testTranscriptionStrandMetrics() throws Exception { final String sequence = "chr1"; final String ignoredSequence = "chrM"; // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic. final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); // Set seed so that strandedness is consistent among runs. builder.setRandomSeed(0); builder.setReadLength(50); final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence); // Reads that start *before* the gene: count as unexamined builder.addPair("pair_prior_1", sequenceIndex, 45, 50, false, false, "50M", "50M", true, false, -1); builder.addPair("pair_prior_2", sequenceIndex, 49, 50, false, false, "50M", "50M", true, false, -1); // Read that is enclosed in the gene, but one end does not map: count as unexamined builder.addPair("read_one_end_unmapped", sequenceIndex, 50, 51, false, true, "50M", null, false, false, -1); // Reads that are enclosed in the gene, paired and frag: one count per template builder.addFrag("frag_enclosed_forward", sequenceIndex, 150, false); builder.addFrag("frag_enclosed_reverse", sequenceIndex, 150, true); builder.addPair("pair_enclosed_forward", sequenceIndex, 150, 200, false, false, "50M", "50M", false, true, -1); builder.addPair("pair_enclosed_reverse", sequenceIndex, 200, 150, false, false, "50M", "50M", true, false, -1); // Read that starts *after* the gene: not counted builder.addPair("pair_after_1", sequenceIndex, 545, 550, false, false, "50M", "50M", true, false, -1); builder.addPair("pair_after_2", sequenceIndex, 549, 550, false, false, "50M", "50M", true, false, -1); // A read that uses the read length instead of the mate cigar builder.addFrag("frag_enclosed_forward_no_mate_cigar", sequenceIndex, 150, false).setAttribute(SAMTag.MC.name(), null); // Duplicate reads are counted builder.addFrag("frag_duplicate", sequenceIndex, 150, false).setDuplicateReadFlag(true); // These reads should be ignored. builder.addFrag("frag_secondary", sequenceIndex, 150, false).setNotPrimaryAlignmentFlag(true); builder.addFrag("frag_supplementary", sequenceIndex, 150, false).setSupplementaryAlignmentFlag(true); builder.addFrag("frag_qc_failure", sequenceIndex, 150, false).setReadFailsVendorQualityCheckFlag(true); final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam"); samFile.deleteOnExit(); final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile); for (final SAMRecord rec: builder.getRecords()) samWriter.addAlignment(rec); samWriter.close(); // Create an interval list with one ribosomal interval. final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA"); final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader()); rRnaIntervalList.add(rRnaInterval); final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list"); rRnaIntervalsFile.deleteOnExit(); rRnaIntervalList.write(rRnaIntervalsFile); // Generate the metrics. final File metricsFile = File.createTempFile("tmp.", ".rna_metrics"); metricsFile.deleteOnExit(); final String[] args = new String[] { "INPUT=" + samFile.getAbsolutePath(), "OUTPUT=" + metricsFile.getAbsolutePath(), "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(), "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(), "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND", "IGNORE_SEQUENCE=" + ignoredSequence }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<RnaSeqMetrics, Comparable<?>>(); output.read(new FileReader(metricsFile)); final RnaSeqMetrics metrics = output.getMetrics().get(0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 900); Assert.assertEquals(metrics.PF_BASES, 950); Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 0L); Assert.assertEquals(metrics.CODING_BASES, 471); Assert.assertEquals(metrics.UTR_BASES, 125); Assert.assertEquals(metrics.INTRONIC_BASES, 98); Assert.assertEquals(metrics.INTERGENIC_BASES, 206); Assert.assertEquals(metrics.CORRECT_STRAND_READS, 7); Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 6); Assert.assertEquals(metrics.IGNORED_READS, 0); Assert.assertEquals(metrics.NUM_R1_TRANSCRIPT_STRAND_READS, 4); Assert.assertEquals(metrics.NUM_R2_TRANSCRIPT_STRAND_READS, 2); Assert.assertEquals(metrics.NUM_UNEXPLAINED_READS, 3); Assert.assertEquals(metrics.PCT_R1_TRANSCRIPT_STRAND_READS, 0.666667); Assert.assertEquals(metrics.PCT_R2_TRANSCRIPT_STRAND_READS, 0.333333); }
Example 15
Source File: CollectRnaSeqMetricsTest.java From picard with MIT License | 4 votes |
@Test(dataProvider = "rRnaIntervalsFiles") public void testNoIntevalsNoFragPercentage(final File rRnaIntervalsFile) throws Exception { final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); // Add a header but no intervals if ( rRnaIntervalsFile != null ) { final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader()); rRnaIntervalList.write(rRnaIntervalsFile); rRnaIntervalsFile.deleteOnExit(); } // Create some alignments final String sequence = "chr1"; final String ignoredSequence = "chrM"; // Set seed so that strandedness is consistent among runs. builder.setRandomSeed(0); final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence); builder.addPair("pair1", sequenceIndex, 45, 475); builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false); final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam"); samFile.deleteOnExit(); final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile); for (final SAMRecord rec : builder.getRecords()) samWriter.addAlignment(rec); samWriter.close(); // Generate the metrics. final File metricsFile = File.createTempFile("tmp.", ".rna_metrics"); metricsFile.deleteOnExit(); final String rRnaIntervalsPath = rRnaIntervalsFile != null ? rRnaIntervalsFile.getAbsolutePath() : null; final String[] args = new String[]{ "INPUT=" + samFile.getAbsolutePath(), "OUTPUT=" + metricsFile.getAbsolutePath(), "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(), "RIBOSOMAL_INTERVALS=" + rRnaIntervalsPath, "RRNA_FRAGMENT_PERCENTAGE=" + 0.0, "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND", "IGNORE_SEQUENCE=" + ignoredSequence }; try { Assert.assertEquals(runPicardCommandLine(args), 0); } catch(final Exception e) { if (rRnaIntervalsFile != null) { Assert.assertEquals(e.getCause().getClass(), PicardException.class); } } }
Example 16
Source File: CollectRnaSeqMetricsTest.java From picard with MIT License | 4 votes |
@Test public void testBasic() throws Exception { final String sequence = "chr1"; final String ignoredSequence = "chrM"; // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic. final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); // Set seed so that strandedness is consistent among runs. builder.setRandomSeed(0); final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence); builder.addPair("pair1", sequenceIndex, 45, 475); builder.addPair("pair2", sequenceIndex, 90, 225); builder.addPair("pair3", sequenceIndex, 120, 600); builder.addFrag("frag1", sequenceIndex, 150, true); builder.addFrag("frag2", sequenceIndex, 450, true); builder.addFrag("frag3", sequenceIndex, 225, false); builder.addPair("rrnaPair", sequenceIndex, 400, 500); builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false); final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam"); samFile.deleteOnExit(); final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile); for (final SAMRecord rec: builder.getRecords()) samWriter.addAlignment(rec); samWriter.close(); // Create an interval list with one ribosomal interval. final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA"); final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader()); rRnaIntervalList.add(rRnaInterval); final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list"); rRnaIntervalsFile.deleteOnExit(); rRnaIntervalList.write(rRnaIntervalsFile); // Generate the metrics. final File metricsFile = File.createTempFile("tmp.", ".rna_metrics"); metricsFile.deleteOnExit(); final String[] args = new String[] { "INPUT=" + samFile.getAbsolutePath(), "OUTPUT=" + metricsFile.getAbsolutePath(), "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(), "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(), "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND", "IGNORE_SEQUENCE=" + ignoredSequence }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<RnaSeqMetrics, Comparable<?>>(); output.read(new FileReader(metricsFile)); final RnaSeqMetrics metrics = output.getMetrics().get(0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 396); Assert.assertEquals(metrics.PF_BASES, 432); Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 108L); Assert.assertEquals(metrics.CODING_BASES, 136); Assert.assertEquals(metrics.UTR_BASES, 51); Assert.assertEquals(metrics.INTRONIC_BASES, 50); Assert.assertEquals(metrics.INTERGENIC_BASES, 51); Assert.assertEquals(metrics.CORRECT_STRAND_READS, 3); Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 4); Assert.assertEquals(metrics.IGNORED_READS, 1); Assert.assertEquals(metrics.NUM_R1_TRANSCRIPT_STRAND_READS, 1); Assert.assertEquals(metrics.NUM_R2_TRANSCRIPT_STRAND_READS, 2); Assert.assertEquals(metrics.NUM_UNEXPLAINED_READS, 2); Assert.assertEquals(metrics.PCT_R1_TRANSCRIPT_STRAND_READS, 0.333333); Assert.assertEquals(metrics.PCT_R2_TRANSCRIPT_STRAND_READS, 0.666667); }
Example 17
Source File: BedToIntervalList.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY); IOUtil.assertFileIsWritable(OUTPUT); try { // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to. final SAMFileHeader header = new SAMFileHeader(); final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath()); header.setSequenceDictionary(samSequenceDictionary); // set the sort order to be sorted by coordinate, which is actually done below // by getting the .uniqued() intervals list before we write out the file header.setSortOrder(SAMFileHeader.SortOrder.coordinate); final IntervalList intervalList = new IntervalList(header); final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false); final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator(); final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6); while (iterator.hasNext()) { final BEDFeature bedFeature = iterator.next(); final String sequenceName = bedFeature.getContig(); final int start = bedFeature.getStart(); final int end = bedFeature.getEnd(); // NB: do not use an empty name within an interval final String name; if (bedFeature.getName().isEmpty()) { name = null; } else { name = bedFeature.getName(); } final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName); // Do some validation if (null == sequenceRecord) { if (DROP_MISSING_CONTIGS) { LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd())); missingIntervals++; missingRegion += bedFeature.getEnd() - bedFeature.getStart(); continue; } throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName)); } else if (start < 1) { throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start)); } else if (sequenceRecord.getSequenceLength() < start) { throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start)); } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig || end < 0 ) { throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end)); } else if (sequenceRecord.getSequenceLength() < end) { throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end)); } else if (end < start - 1) { throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start)); } final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE; final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name); intervalList.add(interval); progressLogger.record(sequenceName, start); } CloserUtil.close(bedReader); if (DROP_MISSING_CONTIGS) { if (missingRegion == 0) { LOG.info("There were no missing regions."); } else { LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion)); } } // Sort and write the output IntervalList out = intervalList; if (SORT) { out = out.sorted(); } if (UNIQUE) { out = out.uniqued(); } out.write(OUTPUT); LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount())); } catch (final IOException e) { throw new RuntimeException(e); } return 0; }
Example 18
Source File: LiftOverIntervalList.java From picard with MIT License | 4 votes |
/** * Do the work after command line has been parsed. RuntimeException may be * thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY); IOUtil.assertFileIsReadable(CHAIN); IOUtil.assertFileIsWritable(OUTPUT); if (REJECT != null) IOUtil.assertFileIsWritable(REJECT); final LiftOver liftOver = new LiftOver(CHAIN); liftOver.setLiftOverMinMatch(MIN_LIFTOVER_PCT); final IntervalList intervalList = IntervalList.fromFile(INPUT); final IntervalList rejects = new IntervalList(intervalList.getHeader()); final long baseCount = intervalList.getBaseCount(); LOG.info("Lifting over " + intervalList.getIntervals().size() + " intervals, encompassing " + baseCount + " bases."); final SAMFileHeader toHeader = new SAMFileHeader(SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath())); liftOver.validateToSequences(toHeader.getSequenceDictionary()); final IntervalList toIntervals = new IntervalList(toHeader); for (final Interval fromInterval : intervalList) { final Interval toInterval = liftOver.liftOver(fromInterval); if (toInterval != null) { toIntervals.add(toInterval); } else { rejects.add(fromInterval); LOG.warn("Liftover failed for ", fromInterval, " (len ", fromInterval.length(), ")"); final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval); for (final LiftOver.PartialLiftover partial : partials) { LOG.info(partial); } } } toIntervals.sorted().write(OUTPUT); if (REJECT != null) { rejects.write(REJECT); } final long rejectBaseCount = rejects.getBaseCount(); LOG.info(String.format("Liftover Complete. \n" + "%d of %d intervals failed (%g%%) to liftover, encompassing %d of %d bases (%g%%).", rejects.getIntervals().size(), intervalList.getIntervals().size(), 100 * rejects.getIntervals().size() / (double) intervalList.getIntervals().size(), rejectBaseCount, baseCount, 100 * rejectBaseCount / (double) baseCount )); return rejects.getIntervals().isEmpty() ? 0 : 1; }