htsjdk.samtools.util.IntervalList#uniqued

Source File: RnaSeqMetricsCollector.java From picard with MIT License

6 votes

public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) {

        final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0);
        if (ribosomalIntervalsFile != null) {

            final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile);
            if (ribosomalIntervals.size() == 0) {
                log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals");
            }
            try {
                SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary());
            } catch (SequenceUtil.SequenceListsDifferException e) {
                throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(),
                        e);
            }
            final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued();
            final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals();
            ribosomalSequenceOverlapDetector.addAll(intervals, intervals);
        }
        return ribosomalSequenceOverlapDetector;
    }

Source File: BaitDesigner.java From picard with MIT License

6 votes

/** Calculates a few statistics about the bait design that can then be output. */
void calculateStatistics(final IntervalList targets, final IntervalList baits) {
    this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount();
    this.TARGET_COUNT = targets.size();
    this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount();
    this.BAIT_COUNT = baits.size();
    this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY;

    // Figure out the intersection between all targets and all baits
    final IntervalList tmp = new IntervalList(targets.getHeader());
    final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0);
    detector.addAll(baits.getIntervals(), baits.getIntervals());

    for (final Interval target : targets) {
        final Collection<Interval> overlaps = detector.getOverlaps(target);

        if (overlaps.isEmpty()) {
            this.ZERO_BAIT_TARGETS++;
        } else {
            for (final Interval i : overlaps) tmp.add(target.intersect(i));
        }
    }

    tmp.uniqued();
    this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount();
}

Source File: FingerprintChecker.java From picard with MIT License

5 votes

/**
 * Takes a set of fingerprints and returns an IntervalList containing all the loci that
 * can be productively examined in sequencing data to compare to one or more of the
 * fingerprints.
 */
public IntervalList getLociToGenotype(final Collection<Fingerprint> fingerprints) {
    final IntervalList intervals = new IntervalList(this.haplotypes.getHeader());

    for (final Fingerprint fp : fingerprints) {
        for (final HaplotypeProbabilities genotype : fp.values()) {
            final HaplotypeBlock h = genotype.getHaplotype();
            for (final Snp snp : h.getSnps()) {
                intervals.add(new Interval(snp.getChrom(), snp.getPos(), snp.getPos(), false, snp.getName()));
            }
        }
    }

    return intervals.uniqued();
}

Source File: WgsMetrics.java From picard with MIT License

5 votes

/**
 * Create an instance of this metric that is mergeable.
 *
 * @param highQualityDepthHistogram the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY.
 * @param unfilteredDepthHistogram the depth histogram that includes all but quality 2 bases.
 * @param pctExcludedByAdapter the fraction of aligned bases that were filtered out because they were in reads with 0 mapping quality that looked like adapter sequence.
 * @param pctExcludedByMapq the fraction of aligned bases that were filtered out because they were in reads with low mapping quality.
 * @param pctExcludedByDupes the fraction of aligned bases that were filtered out because they were in reads marked as duplicates.
 * @param pctExcludedByPairing the fraction of bases that were filtered out because they were in reads without a mapped mate pair.
 * @param pctExcludedByBaseq the fraction of aligned bases that were filtered out because they were of low base quality.
 * @param pctExcludedByOverlap the fraction of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads.
 * @param pctExcludedByCapping the fraction of aligned bases that were filtered out because they would have raised coverage above the capped value.
 * @param pctExcludeTotal the fraction of bases excluded across all filters.
 * @param coverageCap Treat positions with coverage exceeding this value as if they had coverage at this value.
 * @param unfilteredBaseQHistogram the count of bases observed with a given quality. Includes all but quality 2 bases.
 * @param theoreticalHetSensitivitySampleSize the sample size used for theoretical het sensitivity sampling.
 */
public WgsMetrics(final IntervalList intervals,
                  final Histogram<Integer> highQualityDepthHistogram,
                  final Histogram<Integer> unfilteredDepthHistogram,
                  final double pctExcludedByAdapter,
                  final double pctExcludedByMapq,
                  final double pctExcludedByDupes,
                  final double pctExcludedByPairing,
                  final double pctExcludedByBaseq,
                  final double pctExcludedByOverlap,
                  final double pctExcludedByCapping,
                  final double pctExcludeTotal,
                  final int coverageCap,
                  final Histogram<Integer> unfilteredBaseQHistogram,
                  final int theoreticalHetSensitivitySampleSize) {
    this.intervals      = intervals.uniqued();
    this.highQualityDepthHistogram = highQualityDepthHistogram;
    this.unfilteredDepthHistogram = unfilteredDepthHistogram;
    this.unfilteredBaseQHistogram = unfilteredBaseQHistogram;
    this.coverageCap    = coverageCap;
    this.theoreticalHetSensitivitySampleSize = theoreticalHetSensitivitySampleSize;

    PCT_EXC_ADAPTER  = pctExcludedByAdapter;
    PCT_EXC_MAPQ     = pctExcludedByMapq;
    PCT_EXC_DUPE     = pctExcludedByDupes;
    PCT_EXC_UNPAIRED = pctExcludedByPairing;
    PCT_EXC_BASEQ    = pctExcludedByBaseq;
    PCT_EXC_OVERLAP  = pctExcludedByOverlap;
    PCT_EXC_CAPPED   = pctExcludedByCapping;
    PCT_EXC_TOTAL    = pctExcludeTotal;

    calculateDerivedFields();
}

Source File: IntervalListScattererWithSubdivision.java From picard with MIT License

4 votes

@Override
public IntervalList preprocessIntervalList(IntervalList inputList) {
    return inputList.uniqued();
}

Source File: BedToIntervalList.java From picard with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}

Java Code Examples for htsjdk.samtools.util.IntervalList#uniqued()