htsjdk.samtools.util.PeekableIterator#hasNext

Source File: MultiCellDigitalAlleleCounts.java From Drop-seq with MIT License

6 votes

/**
 * Constructs a meta cell containing all of the umi data across all individual cells in the provided list.
 * Allows you to calculate population wide statistics on all cells across a single SNP/gene.
 * This changes UMI names to be a combination of the cell name and the UMI sequence.
 * This is done because the same UMI sequence can exist in two different cells, and umi IDs are only unique within a cell.
 * @return A new DigitalAlleleCounts object that contains the data from all cells added to this collection.
 * If no data was added to the MultiCellDigitalAlleleCounts object, this will return null.
 */
public DigitalAlleleCounts getMetaAnalysis (final String clusterID, final Collection<String> cells) {
	Iterator<DigitalAlleleCounts> iter =  dacMap.values().iterator();
	// short circuit.
	if (!iter.hasNext())
		return null;

	PeekableIterator<DigitalAlleleCounts> pIter= new PeekableIterator<DigitalAlleleCounts>(iter);
	DigitalAlleleCounts first = pIter.peek();
	DigitalAlleleCounts result = new DigitalAlleleCounts(first.getSnpInterval(), first.getGene(), clusterID, first.getBaseQualityThreshold());
	while (pIter.hasNext()) {
		DigitalAlleleCounts next=pIter.next();
		if (cells.contains(next.getCell()))
			result=addDAC(result, next);
	}
	pIter.close();
	return (result);
}

Source File: CollapseTagWithContext.java From Drop-seq with MIT License

6 votes

private void processContext (Iterable<SAMRecord> i, SAMFileWriter writer, boolean verbose, PrintStream outMetrics) {
	PeekableIterator<SAMRecord> iter = new PeekableIterator<>(i.iterator());
   	if (!iter.hasNext()) return;

	// get context.
	String context = getContextString(iter.peek(), this.CONTEXT_TAGS);

	// get barcode counts.
	ObjectCounter<String> barcodeCounts = getBarcodeCounts (iter, this.COLLAPSE_TAG, this.COUNT_TAGS, this.COUNT_TAGS_EDIT_DISTANCE);
	if (this.MIN_COUNT > 1 & !this.MUTATIONAL_COLLAPSE) barcodeCounts.filterByMinCount(this.MIN_COUNT);
	
	Map<String, String> collapseMap = collapseBarcodes(barcodeCounts, this.FIND_INDELS, this.EDIT_DISTANCE, this.ADAPTIVE_ED_MIN, this.ADAPTIVE_ED_MAX, this.MIN_COUNT, this.MUTATIONAL_COLLAPSE_PATH_ED, verbose, outMetrics, context, this.ADAPTIVE_ED_METRICS_ED_LIST);
	iter = new PeekableIterator<>(i.iterator());
	retagBarcodedReads(iter, barcodeCounts, collapseMap, this.DROP_SMALL_COUNTS, writer, this.COLLAPSE_TAG, this.OUT_TAG);        	

}

Source File: QuerySortedReadPairIteratorUtil.java From picard with MIT License

6 votes

/**
 * Return the next usable read in the iterator
 *
 * @param iterator the iterator to pull from
 * @param justPeek if true, just peek the next usable read rather than pulling it (note: it may remove unusable reads from the iterator)
 * @return the next read or null if none are left
 */
private static SAMRecord getNextUsableRead(final PeekableIterator<SAMRecord> iterator, final boolean justPeek) {

    while (iterator.hasNext()) {
        // trash the next read if it fails PF, is secondary, or is supplementary
        final SAMRecord nextRead = iterator.peek();
        if (nextRead.getReadFailsVendorQualityCheckFlag() || nextRead.isSecondaryOrSupplementary()) {
            iterator.next();
        }
        // otherwise, return it
        else {
            return justPeek ? nextRead : iterator.next();
        }
    }

    // no good reads left
    return null;
}

Source File: AnnotatedIntervalUtils.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

/** Merge AnnotatedIntervals by whether the regions overlap.  Sorting is performed as well, so input
 * ordering is lost.
 *
 * When two overlapping regions are merged, annotations are merged as follows:
 *  - The annotation appears in one region:  Resulting region will have the annotation with the value of that one region
 *  - The annotation appears in both regions:  Resulting region will have both values separated by the {@code annotationSeparator}
 *  parameter.
 *
 * Only merges overlaps, not abutters.
 *
 * @param initialRegions regions to merge.  Never {@code null}
 * @param dictionary sequence dictionary to use for sorting.  Never {@code null}
 * @param annotationSeparator separator to use in the case of annotation conflicts.  Never {@code null}
 * @param progressUpdater Consuming function that can callback with the latest region that has been processed.
 *                        Never {@code null}.  Use a no-op function if you do not wish to provide a progress update.
 * @return Segments will be sorted by the sequence dictionary
 */
public static List<AnnotatedInterval> mergeRegions(final List<AnnotatedInterval> initialRegions,
                                                   final SAMSequenceDictionary dictionary, final String annotationSeparator,
                                                   final Consumer<Locatable> progressUpdater) {

    Utils.nonNull(initialRegions);
    Utils.nonNull(dictionary);
    Utils.nonNull(annotationSeparator);
    Utils.nonNull(progressUpdater);

    final List<AnnotatedInterval> segments = IntervalUtils.sortLocatablesBySequenceDictionary(initialRegions,
            dictionary);

    final List<AnnotatedInterval> finalSegments = new ArrayList<>();
    final PeekableIterator<AnnotatedInterval> segmentsIterator = new PeekableIterator<>(segments.iterator());
    while (segmentsIterator.hasNext()) {
        AnnotatedInterval currentRegion = segmentsIterator.next();
        while (segmentsIterator.peek() != null && IntervalUtils.overlaps(currentRegion, segmentsIterator.peek())) {
            final AnnotatedInterval toBeMerged = segmentsIterator.next();
            currentRegion = merge(currentRegion, toBeMerged, annotationSeparator);
        }
        progressUpdater.accept(currentRegion);
        finalSegments.add(currentRegion);
    }
    return finalSegments;
}

Source File: SimpleGermlineTagger.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

private static List<AnnotatedInterval> mergedRegionsByAnnotation(final String annotationToMerge, final List<AnnotatedInterval> regions) {
    final List<AnnotatedInterval> mergedSegments = new ArrayList<>();
    final PeekableIterator<AnnotatedInterval> segmentsIterator = new PeekableIterator<>(regions.iterator());
    while (segmentsIterator.hasNext()) {
        AnnotatedInterval normalSegment = segmentsIterator.next();
        AnnotatedInterval nextSegment = segmentsIterator.peek();

        int updatedEndPoint =  normalSegment.getEnd();

        // Merge (if any to merge)
        while (segmentsIterator.hasNext() && isMergeableByAnnotation(annotationToMerge, normalSegment, nextSegment)) {
            updatedEndPoint = nextSegment.getEnd();
            segmentsIterator.next();
            nextSegment = segmentsIterator.peek();
        }
        final AnnotatedInterval segmentToAddToResult = new AnnotatedInterval(
                new SimpleInterval(normalSegment.getContig(), normalSegment.getStart(), updatedEndPoint),
                ImmutableSortedMap.of(annotationToMerge, normalSegment.getAnnotationValue(annotationToMerge)));
        mergedSegments.add(segmentToAddToResult);
    }
    return mergedSegments;
}

Source File: FilterBamByTag.java From Drop-seq with MIT License

5 votes

/**
 *
 * @param in
 * @param out
 * @param values
 */
void processPairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality, boolean allowPartial) {
	ProgressLogger progLog = new ProgressLogger(log);
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	PeekableIterator<SAMRecord> iter = new PeekableIterator<>(CustomBAMIterators.getQuerynameSortedRecords(in));
	while (iter.hasNext()) {
		SAMRecord r1 = iter.next();
		progLog.record(r1);
		boolean filterFlag1 = filterRead(r1, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		
		SAMRecord r2 = null;
		if (iter.hasNext()) r2 = iter.peek();
		// check for r2 being null in case the last read is unpaired.
		if (r2!=null && r1.getReadName().equals(r2.getReadName())) {
			// paired read found.
			progLog.record(r2);
			r2=iter.next();
			boolean filterFlag2 = filterRead(r2, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
			// if in accept tag mode, if either read shouldn't be filterd accept the pair
			// if in reject mode, if both reads shouldn't be filtered to accept the pair.
			if ((!filterFlag1 || !filterFlag2 & this.ACCEPT_TAG) || (!filterFlag1 && !filterFlag2 & !this.ACCEPT_TAG)) {
				out.addAlignment(r1);
				out.addAlignment(r2);
				m.READS_ACCEPTED+=2;
			} else 
				m.READS_REJECTED+=2;
		} else if (!filterFlag1) {
			out.addAlignment(r1);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	CloserUtil.close(in);
	writeSummary(summaryFile, m);
	out.close();
	reportAndCheckFilterResults(m);
}

Source File: CreateSnpIntervalFromVcf.java From Drop-seq with MIT License

5 votes

private void validateRequestedSamples (final PeekableIterator<VariantContext> iterator, final Set<String> sample) {
	if (iterator.hasNext()) {
		final VariantContext site = iterator.peek();
		Set<String> vCFSamples = site.getSampleNames();
		for (String s: sample)
			if (!vCFSamples.contains(s))
				throw new IllegalArgumentException("VCF doesn't have the requested sample " + s);
	}
}

Source File: DetectBeadSynthesisErrors.java From Drop-seq with MIT License

5 votes

private void writeFile (final PeekableIterator <BeadSynthesisErrorData> iter, final PrintStream out) {
	if (!iter.hasNext()) {
		out.close();
		return;
	}

	BeadSynthesisErrorData first = iter.peek();
	int umiLength = first.getBaseLength();
	writeBadBarcodeStatisticsFileHeader(umiLength, out);
	while (iter.hasNext()) {
		BeadSynthesisErrorData bsed = iter.next();
		writeBadBarcodeStatisticsFileEntry(bsed, out);
	}
	out.close();
}

Source File: RevertSam.java From picard with MIT License

5 votes

/**
 * Generates a list by consuming from the iterator in order starting with the first available
 * read and continuing while subsequent reads share the same read name. If there are no reads
 * remaining returns an empty list.
 */
private List<SAMRecord> fetchByReadName(final PeekableIterator<SAMRecord> iterator) {
    final List<SAMRecord> out = new ArrayList<>();

    if (iterator.hasNext()) {
        final SAMRecord first = iterator.next();
        out.add(first);

        while (iterator.hasNext() && iterator.peek().getReadName().equals(first.getReadName())) {
            out.add(iterator.next());
        }
    }

    return out;
}

Source File: AnnotatedIntervalUtils.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Merge AnnotatedIntervals by whether the given annotation values match (all annotations must match).
 *
 * Sorting is performed as well, so input ordering is lost.  Note that the input itself is not changed.
 *
 * When two overlapping regions are merged, annotations are merged as follows:
 *  - The annotation appears in one region:  Resulting region will have the annotation with the value of that one region
 *  - The annotation appears in both regions:  Resulting region will have both values separated by the {@code annotationSeparator}
 *  parameter.
 *
 * @param initialRegions Regions to merge.  Never {@code null}
 * @param dictionary Sequence dictionary to use for sorting.  Never {@code null}
 * @param annotationNames Names of the annotations to use for matching the initialRegions.  Never {@code null}
 * @param progressUpdater Consuming function that can callback with the latest region that has been processed.
 *                        Never {@code null}  Use a no-op function if you do not wish to provide a progress update.
 * @param annotationSeparator In the case of conflicting annotation values (of annotations other than those specified
 *                            in annotationNames), use this string to separate the resulting (merged) value.
 *                            Never {@code null}
 * @param maxDistanceInBp if two segments are farther than this distance, do not merge, even if all annotation values
 *                        match.  Must be >= 0.
 * @return merged annotated intervals.  Empty list if initialRegions was empty.  Never {@code null}
 */
public static List<AnnotatedInterval> mergeRegionsByAnnotation(final List<AnnotatedInterval> initialRegions,
                                                   final SAMSequenceDictionary dictionary, final List<String> annotationNames,
                                                   final Consumer<Locatable> progressUpdater, final String annotationSeparator,
                                                               final int maxDistanceInBp) {

    Utils.nonNull(initialRegions);
    Utils.nonNull(dictionary);
    Utils.nonNull(annotationNames);
    Utils.nonNull(annotationSeparator);
    Utils.nonNull(progressUpdater);
    ParamUtils.isPositiveOrZero(maxDistanceInBp, "Cannot have a negative value for distance.");

    final List<AnnotatedInterval> segments = IntervalUtils.sortLocatablesBySequenceDictionary(initialRegions,
            dictionary);

    final List<AnnotatedInterval> finalSegments = new ArrayList<>();
    final PeekableIterator<AnnotatedInterval> segmentsIterator = new PeekableIterator<>(segments.iterator());
    while (segmentsIterator.hasNext()) {
        AnnotatedInterval currentRegion = segmentsIterator.next();

        while (segmentsIterator.peek() != null && isMergeByAnnotation(currentRegion, segmentsIterator.peek(), maxDistanceInBp, annotationNames)
                ) {
            final AnnotatedInterval toBeMerged = segmentsIterator.next();

            currentRegion = merge(currentRegion, toBeMerged, annotationSeparator);
        }
        progressUpdater.accept(currentRegion);
        finalSegments.add(currentRegion);
    }
    return finalSegments;
}

Source File: ComputeUMISharing.java From Drop-seq with MIT License

4 votes

@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}

Source File: CreateSnpIntervalFromVcf.java From Drop-seq with MIT License

4 votes

public IntervalList processData(final File vcfFile, final File sdFile, final Set<String> sample, int GQThreshold, final boolean hetSNPsOnly) {

		final VCFFileReader reader = new VCFFileReader(vcfFile, false);
		if (!VCFUtils.GQInHeader(reader)) {
			GQThreshold=-1;
			log.info("Genotype Quality [GQ] not found in header.  Disabling GQ_THRESHOLD parameter");
		}
		
		final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
		SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
		Set<String> sampleListFinal = sample;
		if (sample==null || sample.isEmpty()) {
			ArrayList<String> s = reader.getFileHeader().getSampleNamesInOrder();
			sampleListFinal=new TreeSet<String>(s);
		}

		if (sdFile != null)
			sequenceDictionary = getSequenceDictionary(sdFile);

		final ProgressLogger progress = new ProgressLogger(this.log, 500000);

		final SAMFileHeader samHeader = new SAMFileHeader();
		samHeader.setSequenceDictionary(sequenceDictionary);
		IntervalList result = new IntervalList(samHeader);

		// Go through the input, find sites we want to keep.
		final PeekableIterator<VariantContext> iterator = new PeekableIterator<>(reader.iterator());

		validateRequestedSamples (iterator, sampleListFinal);

		while (iterator.hasNext()) {
			final VariantContext site = iterator.next();
			progress.record(site.getContig(), site.getStart());
			// for now drop any filtered site.
			if (site.isFiltered())
				continue;
			// move onto the next record if the site is not a SNP or the samples aren't all heterozygous.
			if (!site.isSNP())
				continue;
			if (!sitePassesFilters(site, sampleListFinal, GQThreshold, hetSNPsOnly))
				continue;
			Interval varInt = new Interval(site.getContig(), site.getStart(),
					site.getEnd(), true, site.getID());

			// final Interval site = findHeterozygousSites(full, SAMPLE);
			result.add(varInt);

		}

		CloserUtil.close(iterator);
		CloserUtil.close(reader);
		return (result);
	}

Source File: BamToBfqWriter.java From picard with MIT License

4 votes

/**
 * Count the number of records in the bamFile that could potentially be written
 *
 * @return  the number of records in the Bam file
 */
private int countWritableRecords() {
    int count = 0;

    final SamReader reader = SamReaderFactory.makeDefault().open(this.bamFile);
    if(!reader.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.queryname)) {
    	//this is a fix for issue PIC-274: It looks like BamToBfqWriter requires that the input BAM is queryname sorted, 
    	//but it doesn't check this early, nor produce an understandable error message."
    	throw new PicardException("Input file (" + this.bamFile.getAbsolutePath() +") needs to be sorted by queryname.");
    }
    final PeekableIterator<SAMRecord> it = new PeekableIterator<SAMRecord>(reader.iterator());
    if (!this.pairedReads) {
        // Filter out noise reads and reads that fail the quality filter
        final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
        filters.add(new TagFilter(ReservedTagConstants.XN, 1));
        if (!this.includeNonPfReads) {
            filters.add(new FailsVendorReadQualityFilter());
        }
        final FilteringSamIterator itr = new FilteringSamIterator(it, new AggregateFilter(filters));
        while (itr.hasNext()) {
            itr.next();
            count++;
        }
    }
    else {
        while (it.hasNext()) {
            final SAMRecord first = it.next();
            final SAMRecord second = it.next();
            // If both are noise reads, filter them out
            if (first.getAttribute(ReservedTagConstants.XN) != null &&
                second.getAttribute(ReservedTagConstants.XN) != null)  {
                // skip it
            }
            // If either fails to pass filter, then exclude them as well
            else if (!this.includeNonPfReads && (first.getReadFailsVendorQualityCheckFlag() || second.getReadFailsVendorQualityCheckFlag()) ) {
                // skip it
            }
            // Otherwise, write them out
            else {
                count++;
            }
        }
    }
    it.close();
    CloserUtil.close(reader);
    return count;
}

Source File: CollectSamErrorMetrics.java From picard with MIT License

3 votes

/**
 * Advance the iterator until at or ahead of locus. if there's a variant at the locus return true, otherwise false.
 * <p>
 * HAS SIDE EFFECTS!!! draws from vcfIterator!!!
 *
 * @param vcfIterator        a {@link VariantContext} iterator to advance (assumed sorted)
 * @param locus              a {@link Locus} at which to examine the variants
 * @param sequenceDictionary a dictionary with which to compare the Locatable to the Locus...
 * @return true if there's a variant over the locus, false otherwise.
 */
private static boolean advanceIteratorAndCheckLocus(final PeekableIterator<VariantContext> vcfIterator, final Locus locus, final SAMSequenceDictionary sequenceDictionary) {
    while (vcfIterator.hasNext() && (vcfIterator.peek().isFiltered() ||
            CompareVariantContextToLocus(sequenceDictionary, vcfIterator.peek(), locus) < 0)) {
        vcfIterator.next();
    }
    return vcfIterator.hasNext() && CompareVariantContextToLocus(sequenceDictionary, vcfIterator.peek(), locus) == 0;
}

Java Code Examples for htsjdk.samtools.util.PeekableIterator#hasNext()