Java Code Examples for htsjdk.samtools.util.IntervalList#write()

The following examples show how to use htsjdk.samtools.util.IntervalList#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalListTools.java    From picard with MIT License 6 votes vote down vote up
/**
 * Method to scatter an interval list by locus.
 *
 * @param list The list of intervals to scatter
 * @return The scattered intervals, represented as a {@link List} of {@link IntervalList}
 */
private  ScatterSummary writeScatterIntervals(final IntervalList list) {
    final IntervalListScatterer scatterer = SUBDIVISION_MODE.make();
    final IntervalListScatter scatter = new IntervalListScatter(scatterer, list, SCATTER_COUNT);

    final DecimalFormat fileNameFormatter = new DecimalFormat("0000");
    int fileIndex = 1;
    final ScatterSummary summary = new ScatterSummary();
    for (final IntervalList intervals : scatter) {
        summary.size++;
        summary.baseCount += intervals.getBaseCount();
        summary.intervalCount += intervals.getIntervals().size();
        intervals.write(createDirectoryAndGetScatterFile(OUTPUT, SCATTER_COUNT, fileNameFormatter.format(fileIndex++)));
    }

    return summary;
}
 
Example 2
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData")
public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                   String contig, int intervalStart, int intervalEnd ) throws Exception {

    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list");
    picardIntervals.write(picardIntervalFile);

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath());
}
 
Example 3
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.MalformedFile.class, UserException.MalformedGenomeLoc.class}, dataProvider="invalidIntervalTestData")
public void testLoadIntervalsInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                              String contig, int intervalStart, int intervalEnd ) throws Exception {

    SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    // Intentionally add an extra contig not in our genomeLocParser's sequence dictionary
    // so that we can add intervals to the Picard interval file for contigs not in our dictionary
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    File picardIntervalFile = createTempFile("testLoadIntervalsInvalidPicardIntervalHandling", ".intervals");
    picardIntervals.write(picardIntervalFile);

    List<String> intervalArgs = new ArrayList<>(1);
    intervalArgs.add(picardIntervalFile.getAbsolutePath());

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser);
}
 
Example 4
Source File: GenomicsDBImport.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Before traversal, fix configuration parameters and initialize
 * GenomicsDB. Hard-coded to handle only VCF files and headers
 */
@Override
public void onTraversalStart() {
    if (getIntervalsFromExistingWorkspace) {
        final IntervalList outputList = new IntervalList(getBestAvailableSequenceDictionary());
        intervals.forEach(i -> outputList.add(new Interval(i.getContig(), i.getStart(), i.getEnd())));
        final Path intervalListOutputPath = IOUtils.getPath(intervalListOutputPathString);
        outputList.write(intervalListOutputPath.toFile());
        return;
    }
    String workspaceDir = BucketUtils.makeFilePathAbsolute(overwriteCreateOrCheckWorkspace());
    vidMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
    callsetMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
    vcfHeaderFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);

    if (doIncrementalImport) {
        logger.info("Callset Map JSON file will be re-written to " + callsetMapJSONFile);
        logger.info("Incrementally importing to workspace - " + workspaceDir);
    } else {
        logger.info("Vid Map JSON file will be written to " + vidMapJSONFile);
        logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile);
        logger.info("Complete VCF Header will be written to " + vcfHeaderFile);
        logger.info("Importing to workspace - " + workspaceDir);
    }
    initializeInputPreloadExecutorService();
}
 
Example 5
Source File: FilterIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Object doWork() {
    validateArguments();

    resolveAndValidateIntervals();

    final SimpleIntervalCollection filteredIntervals = filterIntervals();
    logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile.getAbsolutePath()));
    final IntervalList filteredIntervalList = new IntervalList(filteredIntervals.getMetadata().getSequenceDictionary());
    filteredIntervals.getIntervals().forEach(i -> filteredIntervalList.add(new Interval(i)));
    filteredIntervalList.write(outputFilteredIntervalsFile);

    logger.info(String.format("%s complete.", getClass().getSimpleName()));

    return null;
}
 
Example 6
Source File: IntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Splits an interval list into multiple files.
 * @param fileHeader The sam file header.
 * @param splits Pre-divided genome locs returned by splitFixedIntervals.
 * @param scatterParts The output interval lists to write to.
 */
public static void scatterFixedIntervals(final SAMFileHeader fileHeader, final List<List<GenomeLoc>> splits, final List<File> scatterParts) {
    Utils.nonNull(fileHeader, "fileHeader is null");
    Utils.nonNull(splits, "splits is null");
    Utils.nonNull(scatterParts, "scatterParts is null");
    Utils.containsNoNull(splits, "null split loc");

    if (splits.size() != scatterParts.size()) {
        throw new CommandLineException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size()));
    }

    int fileIndex = 0;
    int locIndex = 1;
    for (final List<GenomeLoc> split : splits) {
        final IntervalList intervalList = new IntervalList(fileHeader);
        for (final GenomeLoc loc : split) {
            intervalList.add(toInterval(loc, locIndex++));
        }
        intervalList.write(scatterParts.get(fileIndex++));
    }
}
 
Example 7
Source File: PreprocessIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Object onTraversalSuccess() {
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();

    final List<SimpleInterval> inputIntervals = hasUserSuppliedIntervals()
            ? intervalArgumentCollection.getIntervals(sequenceDictionary)
            : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);    // if the user didn't add any intervals,
                                                                                // we assume that they wanted to do whole genome sequencing

    logger.info("Padding intervals...");
    final IntervalList paddedIntervalList = padIntervals(inputIntervals, padding, sequenceDictionary);

    logger.info("Generating bins...");
    final IntervalList unfilteredBins = generateBins(paddedIntervalList, binLength, sequenceDictionary);

    logger.info("Filtering bins containing only Ns...");
    final ReferenceDataSource reference = ReferenceDataSource.of(referenceArguments.getReferencePath());
    final IntervalList bins = filterBinsContainingOnlyNs(unfilteredBins, reference);

    logger.info(String.format("Writing bins to %s...", outputPreprocessedIntervalsFile.getAbsolutePath()));
    bins.write(outputPreprocessedIntervalsFile);

    logger.info(String.format("%s complete.", getClass().getSimpleName()));

    return null;
}
 
Example 8
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	IntervalList result = processData(this.INPUT, this.SEQUENCE_DICTIONARY, SAMPLE, this.GQ_THRESHOLD, this.HET_SNPS_ONLY);
	result.write(this.OUTPUT);
	return 0;
}
 
Example 9
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider(name = "dataAllFilters")
public Object[][] dataAllFilters() {
    final int numSamples = 100;
    final int numIntervals = 120;
    final int numIntervalsBelowCountThreshold = 10;
    final int numIntervalsFailingAnnotationFilters = 10;
    final int numUncorruptedSamples = 10;
    final int lowCountFilterCountThreshold = 5;
    final double epsilon = 1E-1;
    final double percentageOfSamples = 100. * (numSamples - numUncorruptedSamples) / numSamples - epsilon;

    final File intervalsFile = createTempFile("intervals", ".interval_list");
    final File annotatedIntervalsFile = createTempFile("annotated-intervals", ".tsv");
    final List<File> countFiles = new ArrayList<>(numSamples);
    for (int sampleIndex = 0; sampleIndex < numSamples; sampleIndex++) {
        final String sampleName = String.format("sample-%d", sampleIndex);
        final SampleLocatableMetadata sampleMetadata = new SimpleSampleLocatableMetadata(sampleName, SEQUENCE_DICTIONARY);
        final List<SimpleCount> counts = new ArrayList<>(numIntervals);
        for (int intervalIndex = 0; intervalIndex < numIntervals; intervalIndex++) {
            final SimpleInterval interval = new SimpleInterval("20", 10 * intervalIndex + 1, 10 * (intervalIndex + 1));
            if (intervalIndex < numIntervalsBelowCountThreshold && sampleIndex >= numUncorruptedSamples) {
                //corrupt first numIntervalsBelowCountThreshold intervals in last (numSamples - numUncorruptedSamples) samples with low counts
                counts.add(new SimpleCount(interval, lowCountFilterCountThreshold - 1));
            } else {
                counts.add(new SimpleCount(interval, intervalIndex + lowCountFilterCountThreshold));
            }
        }
        final SimpleCountCollection sampleCounts = new SimpleCountCollection(sampleMetadata, counts);
        final File countFile = createTempFile(sampleName, ".tsv");
        sampleCounts.write(countFile);
        countFiles.add(countFile);

        if (sampleIndex == 0) {
            final List<SimpleInterval> sampleIntervals = sampleCounts.getIntervals();
            final IntervalList intervals = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary());
            sampleIntervals.forEach(i -> intervals.add(new Interval(i)));
            intervals.write(intervalsFile);

            final AnnotationMap passingAnnotation = new AnnotationMap(Arrays.asList(
                    Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.5),
                    Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.5),
                    Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.5)));
            final AnnotationMap failingAnnotation = new AnnotationMap(Arrays.asList(
                    Pair.of(CopyNumberAnnotations.GC_CONTENT, 0.05),
                    Pair.of(CopyNumberAnnotations.MAPPABILITY, 0.95),
                    Pair.of(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, 0.05)));
            final List<AnnotatedInterval> annotatedIntervals = IntStream.range(0, numIntervals).boxed()
                    .map(i -> new AnnotatedInterval(new SimpleInterval(sampleIntervals.get(i)),
                        //last numIntervalsFailingAnnotationFilters intervals fail annotation-based filters
                        i >= numIntervals - numIntervalsFailingAnnotationFilters
                                ? failingAnnotation
                                : passingAnnotation))
                    .collect(Collectors.toList());
            new AnnotatedIntervalCollection(sampleMetadata, annotatedIntervals).write(annotatedIntervalsFile);
        }
    }

    return new Object[][]{
            //intervals file, array of strings for excluded intervals, annotated-intervals file, min/max GC content, mix/max mappability, min/max seg-dupe content,
            //count files, lowCountFilterCountThreshold, lowCountFilterPercentageOfSamples,
            //extremeCountFilterMinimumPercentile, extremeCountFilterMaximumPercentile, extremeCountFilterPercentageOfSamples,
            //expected array of indices of retained intervals
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9,
                    countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9,
                    countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())},
            //allow last numIntervalsFailingAnnotationFilters to pass annotation-based filters but exclude using -XL
            {intervalsFile, Collections.singletonList(String.format("20:%d-%d", (numIntervals - numIntervalsFailingAnnotationFilters) * 10 + 5, numIntervals * 10)), annotatedIntervalsFile, 0, 1, 0, 1, 0, 1,
                    countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}};
}
 
Example 10
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider(name = "dataCountBasedFilters")
public Object[][] dataCountBasedFilters() {
    final int numSamples = 100;
    final int numIntervals = 110;
    final int numIntervalsBelowCountThreshold = 10;
    final int numUncorruptedSamples = 10;
    final int lowCountFilterCountThreshold = 5;
    final double epsilon = 1E-1;
    final double percentageOfSamples = 100. * (numSamples - numUncorruptedSamples) / numSamples - epsilon;

    final File intervalsFile = createTempFile("intervals", ".interval_list");
    final File intervalsWithExtraIntervalFile = createTempFile("intervals-with-extra-interval", ".interval_list");
    final List<File> countFiles = new ArrayList<>(numSamples);
    for (int sampleIndex = 0; sampleIndex < numSamples; sampleIndex++) {
        final String sampleName = String.format("sample-%d", sampleIndex);
        final SampleLocatableMetadata sampleMetadata = new SimpleSampleLocatableMetadata(sampleName, SEQUENCE_DICTIONARY);
        final List<SimpleCount> counts = new ArrayList<>(numIntervals);
        for (int intervalIndex = 0; intervalIndex < numIntervals; intervalIndex++) {
            final SimpleInterval interval = new SimpleInterval("20", 10 * intervalIndex + 1, 10 * (intervalIndex + 1));
            if (intervalIndex < numIntervalsBelowCountThreshold && sampleIndex >= numUncorruptedSamples) {
                //corrupt first numIntervalsBelowCountThreshold intervals in last (numSamples - numUncorruptedSamples) samples with low counts
                counts.add(new SimpleCount(interval, lowCountFilterCountThreshold - 1));
            } else {
                counts.add(new SimpleCount(interval, intervalIndex + lowCountFilterCountThreshold));
            }
        }
        final SimpleCountCollection sampleCounts = new SimpleCountCollection(sampleMetadata, counts);
        final File countFile = createTempFile(sampleName, ".tsv");
        sampleCounts.write(countFile);
        countFiles.add(countFile);

        if (sampleIndex == 0) {
            final IntervalList intervals = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary());
            sampleCounts.getIntervals().forEach(i -> intervals.add(new Interval(i)));
            intervals.write(intervalsFile);

            //test that intersection gets rid of extra intervals in the interval list
            final IntervalList intervalsWithExtraInterval = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary());
            sampleCounts.getIntervals().forEach(i -> intervalsWithExtraInterval.add(new Interval(i)));
            intervalsWithExtraInterval.add(new Interval("20", 100000, 200000));
            intervalsWithExtraInterval.write(intervalsWithExtraIntervalFile);
        }
    }

    return new Object[][]{
            //intervals file, count files, lowCountFilterCountThreshold, lowCountFilterPercentageOfSamples,
            //extremeCountFilterMinimumPercentile, extremeCountFilterMaximumPercentile, extremeCountFilterPercentageOfSamples,
            //expected array of indices of retained intervals
            {intervalsFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())},
            {intervalsFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())},
            {intervalsWithExtraIntervalFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())},
            {intervalsWithExtraIntervalFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}};
}
 
Example 11
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider(name = "dataAnnotationBasedFilters")
public Object[][] dataAnnotationBasedFilters() {
    final File annotatedIntervalsFile = createTempFile("annotated-intervals", ".tsv");
    ANNOTATED_INTERVALS.write(annotatedIntervalsFile);
    final File intervalsFile = createTempFile("intervals", ".interval_list");
    final IntervalList intervals = new IntervalList(ANNOTATED_INTERVALS.getMetadata().getSequenceDictionary());
    ANNOTATED_INTERVALS.getIntervals().forEach(i -> intervals.add(new Interval(i)));
    intervals.write(intervalsFile);

    //test that intersection gets rid of extra intervals in the interval list
    final File intervalsWithExtraIntervalFile = createTempFile("intervals-with-extra-interval", ".interval_list");
    final IntervalList intervalsWithExtraInterval = new IntervalList(ANNOTATED_INTERVALS.getMetadata().getSequenceDictionary());
    ANNOTATED_INTERVALS.getIntervals().forEach(i -> intervalsWithExtraInterval.add(new Interval(i)));
    intervalsWithExtraInterval.add(new Interval("20", 100000, 200000));
    intervalsWithExtraInterval.write(intervalsWithExtraIntervalFile);

    return new Object[][]{
            //intervals file, array of strings for excluded intervals, annotated-intervals file,
            //min/max GC content, mix/max mappability, min/max seg-dupe content, expected array of indices of retained intervals
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(0, 1, 2, 3, 4, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 2, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 3, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0.1, 0.9, Arrays.asList(2, 3, 4, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0.1, 0.9, Arrays.asList(2, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 3, 5)},
            {intervalsFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 5)},
            {intervalsFile, Collections.singletonList("20:1-10"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(1, 2, 3, 4, 5)},
            {intervalsFile, Arrays.asList("20:1-15", "20:35-45"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(2, 5)},
            {intervalsFile, Collections.singletonList("20:25-50"), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(0, 1, 2, 3, 4, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 2, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 3, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0., 1., 0.1, 0.9, Arrays.asList(2, 3, 4, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0., 1., Arrays.asList(1, 2, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0.1, 0.9, Arrays.asList(2, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0., 1., 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 3, 5)},
            {intervalsWithExtraIntervalFile, Collections.emptyList(), annotatedIntervalsFile, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, Arrays.asList(2, 5)},
            {intervalsWithExtraIntervalFile, Collections.singletonList("20:1-10"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(1, 2, 3, 4, 5)},
            {intervalsWithExtraIntervalFile, Arrays.asList("20:1-15", "20:35-45"), annotatedIntervalsFile, 0., 1., 0., 1., 0., 1., Arrays.asList(2, 5)},
            {intervalsWithExtraIntervalFile, Collections.singletonList("20:25-50"), annotatedIntervalsFile, 0.1, 0.9, 0., 1., 0., 1., Arrays.asList(0, 1, 5)}};
}
 
Example 12
Source File: CollectHsMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testHsMetricsHandlesIndelsAppropriately() throws IOException {
    final SAMRecordSetBuilder withDeletions = new SAMRecordSetBuilder(true, SortOrder.coordinate);
    final SAMRecordSetBuilder withInsertions = new SAMRecordSetBuilder(true, SortOrder.coordinate);
    final IntervalList targets = new IntervalList(withDeletions.getHeader());
    final IntervalList baits   = new IntervalList(withDeletions.getHeader());
    targets.add(new Interval("chr1", 1000, 1199, false, "t1"));
    baits.add(new Interval("chr1", 950,  1049, false, "b1"));
    baits.add(new Interval("chr1", 1050, 1149, false, "b2"));
    baits.add(new Interval("chr1", 1150, 1249, false, "b3"));

    // Generate 100 reads that fully cover the the target in each BAM
    for (int i=0; i<100; ++i) {
        withDeletions.addFrag( "d" + i, 0, 1000, false, false, "100M20D80M", null, 30);
        withInsertions.addFrag("i" + i, 0, 1000, false, false, "100M50I100M", null, 30);
    }

    // Write things out to file
    final File dir = IOUtil.createTempDir("hsmetrics.", ".test");
    final File bs = new File(dir, "baits.interval_list").getAbsoluteFile();
    final File ts = new File(dir, "targets.interval_list").getAbsoluteFile();
    baits.write(bs);
    targets.write(ts);
    final File withDelBam = writeBam(withDeletions,  new File(dir, "with_del.bam"));
    final File withInsBam = writeBam(withInsertions, new File(dir, "with_ins.bam"));

    // Now run CollectHsMetrics four times
    final File out = Files.createTempFile("hsmetrics.", ".txt").toFile();
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath()));
    final HsMetrics delsWithoutIndelHandling = readMetrics(out);
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath()));
    final HsMetrics delsWithIndelHandling = readMetrics(out);
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath()));
    final HsMetrics insWithoutIndelHandling = readMetrics(out);
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath()));
    final HsMetrics insWithIndelHandling = readMetrics(out);

    IOUtil.deleteDirectoryTree(dir);

    Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0);  // 100X over 180/200 bases due to deletion
    Assert.assertEquals(delsWithIndelHandling.MEAN_TARGET_COVERAGE, 100.0);    // 100X with counting the deletion

    Assert.assertEquals(insWithoutIndelHandling.PCT_USABLE_BASES_ON_TARGET, 200/250d); // 50/250 inserted bases are not counted as on target
    Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET,   1.0d);      // inserted bases are counted as on target
}
 
Example 13
Source File: CollectRnaSeqMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testPctRibosomalBases() throws Exception {
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";

    // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);

    builder.addPair("rrnaPair", sequenceIndex, 400, 500, false, false, "35I1M", "35I1M", true, true, -1);
    builder.addFrag("frag1", sequenceIndex, 150, true, false, "34I2M", "*", -1);
    builder.addFrag("frag2", sequenceIndex, 190, true, false, "35I1M", "*", -1);
    builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
    final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    samFile.deleteOnExit();
    try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
        for (final SAMRecord rec : builder.getRecords()) {
            samWriter.addAlignment(rec);
        }
    }

    // Create an interval list with one ribosomal interval.
    final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
    final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
    rRnaIntervalList.add(rRnaInterval);
    final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list");
    rRnaIntervalsFile.deleteOnExit();
    rRnaIntervalList.write(rRnaIntervalsFile);

    // Generate the metrics.
    final File metricsFile = File.createTempFile("tmp.", ".rna_metrics");
    metricsFile.deleteOnExit();

    final String[] args = new String[]{
            "INPUT=" + samFile.getAbsolutePath(),
            "OUTPUT=" + metricsFile.getAbsolutePath(),
            "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(),
            "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(),
            "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND",
            "IGNORE_SEQUENCE=" + ignoredSequence
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
    output.read(new FileReader(metricsFile));

    final RnaSeqMetrics metrics = output.getMetrics().get(0);

    Assert.assertEquals(metrics.PCT_RIBOSOMAL_BASES, 0.4);
}
 
Example 14
Source File: CollectRnaSeqMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testTranscriptionStrandMetrics() throws Exception {
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";

    // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    builder.setReadLength(50);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);

    // Reads that start *before* the gene: count as unexamined
    builder.addPair("pair_prior_1", sequenceIndex, 45, 50, false, false, "50M", "50M", true, false, -1);
    builder.addPair("pair_prior_2", sequenceIndex, 49, 50, false, false, "50M", "50M", true, false, -1);

    // Read that is enclosed in the gene, but one end does not map: count as unexamined
    builder.addPair("read_one_end_unmapped", sequenceIndex, 50, 51, false, true, "50M", null, false, false, -1);

    // Reads that are enclosed in the gene, paired and frag: one count per template
    builder.addFrag("frag_enclosed_forward", sequenceIndex, 150, false);
    builder.addFrag("frag_enclosed_reverse", sequenceIndex, 150, true);
    builder.addPair("pair_enclosed_forward", sequenceIndex, 150, 200, false, false, "50M", "50M", false, true, -1);
    builder.addPair("pair_enclosed_reverse", sequenceIndex, 200, 150, false, false, "50M", "50M", true, false, -1);

    // Read that starts *after* the gene: not counted
    builder.addPair("pair_after_1", sequenceIndex, 545, 550, false, false, "50M", "50M", true, false, -1);
    builder.addPair("pair_after_2", sequenceIndex, 549, 550, false, false, "50M", "50M", true, false, -1);

    // A read that uses the read length instead of the mate cigar
    builder.addFrag("frag_enclosed_forward_no_mate_cigar", sequenceIndex, 150, false).setAttribute(SAMTag.MC.name(), null);

    // Duplicate reads are counted
    builder.addFrag("frag_duplicate", sequenceIndex, 150, false).setDuplicateReadFlag(true);

    // These reads should be ignored.
    builder.addFrag("frag_secondary", sequenceIndex, 150, false).setNotPrimaryAlignmentFlag(true);
    builder.addFrag("frag_supplementary", sequenceIndex, 150, false).setSupplementaryAlignmentFlag(true);
    builder.addFrag("frag_qc_failure", sequenceIndex, 150, false).setReadFailsVendorQualityCheckFlag(true);

    final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    samFile.deleteOnExit();

    final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile);
    for (final SAMRecord rec: builder.getRecords()) samWriter.addAlignment(rec);
    samWriter.close();

    // Create an interval list with one ribosomal interval.
    final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
    final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
    rRnaIntervalList.add(rRnaInterval);
    final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list");
    rRnaIntervalsFile.deleteOnExit();
    rRnaIntervalList.write(rRnaIntervalsFile);

    // Generate the metrics.
    final File metricsFile = File.createTempFile("tmp.", ".rna_metrics");
    metricsFile.deleteOnExit();

    final String[] args = new String[] {
            "INPUT=" +               samFile.getAbsolutePath(),
            "OUTPUT=" +              metricsFile.getAbsolutePath(),
            "REF_FLAT=" +            getRefFlatFile(sequence).getAbsolutePath(),
            "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(),
            "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND",
            "IGNORE_SEQUENCE=" + ignoredSequence
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<RnaSeqMetrics, Comparable<?>>();
    output.read(new FileReader(metricsFile));

    final RnaSeqMetrics metrics = output.getMetrics().get(0);
    Assert.assertEquals(metrics.PF_ALIGNED_BASES, 900);
    Assert.assertEquals(metrics.PF_BASES, 950);
    Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 0L);
    Assert.assertEquals(metrics.CODING_BASES, 471);
    Assert.assertEquals(metrics.UTR_BASES, 125);
    Assert.assertEquals(metrics.INTRONIC_BASES, 98);
    Assert.assertEquals(metrics.INTERGENIC_BASES, 206);
    Assert.assertEquals(metrics.CORRECT_STRAND_READS, 7);
    Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 6);
    Assert.assertEquals(metrics.IGNORED_READS, 0);
    Assert.assertEquals(metrics.NUM_R1_TRANSCRIPT_STRAND_READS, 4);
    Assert.assertEquals(metrics.NUM_R2_TRANSCRIPT_STRAND_READS, 2);
    Assert.assertEquals(metrics.NUM_UNEXPLAINED_READS, 3);
    Assert.assertEquals(metrics.PCT_R1_TRANSCRIPT_STRAND_READS, 0.666667);
    Assert.assertEquals(metrics.PCT_R2_TRANSCRIPT_STRAND_READS, 0.333333);
}
 
Example 15
Source File: CollectRnaSeqMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test(dataProvider = "rRnaIntervalsFiles")
public void testNoIntevalsNoFragPercentage(final File rRnaIntervalsFile) throws Exception {
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);

    // Add a header but no intervals
    if ( rRnaIntervalsFile != null  ) {
        final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
        rRnaIntervalList.write(rRnaIntervalsFile);
        rRnaIntervalsFile.deleteOnExit();
    }

    // Create some alignments
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";

    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
    builder.addPair("pair1", sequenceIndex, 45, 475);
    builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);

    final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    samFile.deleteOnExit();

    final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile);
    for (final SAMRecord rec : builder.getRecords()) samWriter.addAlignment(rec);
    samWriter.close();

    // Generate the metrics.
    final File metricsFile = File.createTempFile("tmp.", ".rna_metrics");
    metricsFile.deleteOnExit();

    final String rRnaIntervalsPath = rRnaIntervalsFile != null ? rRnaIntervalsFile.getAbsolutePath() : null;
    final String[] args = new String[]{
            "INPUT=" + samFile.getAbsolutePath(),
            "OUTPUT=" + metricsFile.getAbsolutePath(),
            "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(),
            "RIBOSOMAL_INTERVALS=" + rRnaIntervalsPath,
            "RRNA_FRAGMENT_PERCENTAGE=" + 0.0,
            "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND",
            "IGNORE_SEQUENCE=" + ignoredSequence
    };
    try {
        Assert.assertEquals(runPicardCommandLine(args), 0);
    } catch(final Exception e) {
        if (rRnaIntervalsFile != null) {
            Assert.assertEquals(e.getCause().getClass(), PicardException.class);
        }
    }
}
 
Example 16
Source File: CollectRnaSeqMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testBasic() throws Exception {
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";

    // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
    builder.addPair("pair1", sequenceIndex, 45, 475);
    builder.addPair("pair2", sequenceIndex, 90, 225);
    builder.addPair("pair3", sequenceIndex, 120, 600);
    builder.addFrag("frag1", sequenceIndex, 150, true);
    builder.addFrag("frag2", sequenceIndex, 450, true);
    builder.addFrag("frag3", sequenceIndex, 225, false);
    builder.addPair("rrnaPair", sequenceIndex, 400, 500);

    builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);

    final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    samFile.deleteOnExit();

    final SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile);
    for (final SAMRecord rec: builder.getRecords()) samWriter.addAlignment(rec);
    samWriter.close();

    // Create an interval list with one ribosomal interval.
    final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
    final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
    rRnaIntervalList.add(rRnaInterval);
    final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list");
    rRnaIntervalsFile.deleteOnExit();
    rRnaIntervalList.write(rRnaIntervalsFile);

    // Generate the metrics.
    final File metricsFile = File.createTempFile("tmp.", ".rna_metrics");
    metricsFile.deleteOnExit();

    final String[] args = new String[] {
            "INPUT=" +               samFile.getAbsolutePath(),
            "OUTPUT=" +              metricsFile.getAbsolutePath(),
            "REF_FLAT=" +            getRefFlatFile(sequence).getAbsolutePath(),
            "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(),
            "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND",
            "IGNORE_SEQUENCE=" + ignoredSequence
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<RnaSeqMetrics, Comparable<?>>();
    output.read(new FileReader(metricsFile));

    final RnaSeqMetrics metrics = output.getMetrics().get(0);
    Assert.assertEquals(metrics.PF_ALIGNED_BASES, 396);
    Assert.assertEquals(metrics.PF_BASES, 432);
    Assert.assertEquals(metrics.RIBOSOMAL_BASES.longValue(), 108L);
    Assert.assertEquals(metrics.CODING_BASES, 136);
    Assert.assertEquals(metrics.UTR_BASES, 51);
    Assert.assertEquals(metrics.INTRONIC_BASES, 50);
    Assert.assertEquals(metrics.INTERGENIC_BASES, 51);
    Assert.assertEquals(metrics.CORRECT_STRAND_READS, 3);
    Assert.assertEquals(metrics.INCORRECT_STRAND_READS, 4);
    Assert.assertEquals(metrics.IGNORED_READS, 1);
    Assert.assertEquals(metrics.NUM_R1_TRANSCRIPT_STRAND_READS, 1);
    Assert.assertEquals(metrics.NUM_R2_TRANSCRIPT_STRAND_READS, 2);
    Assert.assertEquals(metrics.NUM_UNEXPLAINED_READS, 2);
    Assert.assertEquals(metrics.PCT_R1_TRANSCRIPT_STRAND_READS, 0.333333);
    Assert.assertEquals(metrics.PCT_R2_TRANSCRIPT_STRAND_READS, 0.666667);
}
 
Example 17
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}
 
Example 18
Source File: LiftOverIntervalList.java    From picard with MIT License 4 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsReadable(CHAIN);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (REJECT != null) IOUtil.assertFileIsWritable(REJECT);

    final LiftOver liftOver = new LiftOver(CHAIN);
    liftOver.setLiftOverMinMatch(MIN_LIFTOVER_PCT);

    final IntervalList intervalList = IntervalList.fromFile(INPUT);
    final IntervalList rejects = new IntervalList(intervalList.getHeader());

    final long baseCount = intervalList.getBaseCount();
    LOG.info("Lifting over " + intervalList.getIntervals().size() + " intervals, encompassing " +
            baseCount + " bases.");

    final SAMFileHeader toHeader = new SAMFileHeader(SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath()));
    liftOver.validateToSequences(toHeader.getSequenceDictionary());
    final IntervalList toIntervals = new IntervalList(toHeader);
    for (final Interval fromInterval : intervalList) {
        final Interval toInterval = liftOver.liftOver(fromInterval);
        if (toInterval != null) {
            toIntervals.add(toInterval);
        } else {
            rejects.add(fromInterval);
            LOG.warn("Liftover failed for ", fromInterval, " (len ", fromInterval.length(), ")");
            final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval);
            for (final LiftOver.PartialLiftover partial : partials) {
                LOG.info(partial);
            }
        }
    }

    toIntervals.sorted().write(OUTPUT);

    if (REJECT != null) {
        rejects.write(REJECT);
    }
    final long rejectBaseCount = rejects.getBaseCount();

    LOG.info(String.format("Liftover Complete. \n" +
                    "%d of %d intervals failed (%g%%) to liftover, encompassing %d of %d bases (%g%%).",
            rejects.getIntervals().size(), intervalList.getIntervals().size(),
            100 * rejects.getIntervals().size() / (double) intervalList.getIntervals().size(),
            rejectBaseCount, baseCount,
            100 * rejectBaseCount / (double) baseCount
    ));

    return rejects.getIntervals().isEmpty() ? 0 : 1;
}