Java Code Examples for htsjdk.samtools.SAMRecord#getAlignmentStart()

The following examples show how to use htsjdk.samtools.SAMRecord#getAlignmentStart() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HeaderlessSAMRecordCoordinateComparator.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Compare the coordinates of two reads. If a read is paired and unmapped, use its mate mapping
 * as its position.
 *
 * @return negative if samRecord1 < samRecord2,  0 if equal, else positive
 */
private int compareCoordinates( final SAMRecord samRecord1, final SAMRecord samRecord2 ) {
    final int refIndex1 = header.getSequenceIndex(samRecord1.getReferenceName());
    final int refIndex2 = header.getSequenceIndex(samRecord2.getReferenceName());

    if ( refIndex1 == -1 ) {
        return refIndex2 == -1 ? 0: 1;
    } else if ( refIndex2 == -1 ) {
        return -1;
    }
    final int cmp = refIndex1 - refIndex2;
    if ( cmp != 0 ) {
        return cmp;
    }
    return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
}
 
Example 2
Source File: GcBiasMetricsCollector.java    From picard with MIT License 6 votes vote down vote up
private void addRead(final GcObject gcObj, final SAMRecord rec, final String group, final byte[] gc, final byte[] refBases) {
    if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcObj.totalClusters;
    final int pos = rec.getReadNegativeStrandFlag() ? rec.getAlignmentEnd() - scanWindowSize : rec.getAlignmentStart();
    ++gcObj.totalAlignedReads;
    if (pos > 0) {
        final int windowGc = gc[pos];
        if (windowGc >= 0) {
            ++gcObj.readsByGc[windowGc];
            gcObj.basesByGc[windowGc] += rec.getReadLength();
            gcObj.errorsByGc[windowGc] +=
                    SequenceUtil.countMismatches(rec, refBases, bisulfite) +
                            SequenceUtil.countInsertedBases(rec) + SequenceUtil.countDeletedBases(rec);
        }
    }
    if (gcObj.group == null) {
        gcObj.group = group;
    }
}
 
Example 3
Source File: SortedSAMWriter.java    From abra2 with MIT License 5 votes vote down vote up
private void setMateInfo(SAMRecord read, Map<MateKey, SAMRecord> mates) {
	
	if (read.getReadPairedFlag()) {
		SAMRecord mate = mates.get(getMateKey(read));
		if (mate != null) {
			// Only update mate info if a read has been modified
			if (read.getAttribute("YO") != null || mate.getAttribute("YO") != null) {
				read.setMateAlignmentStart(mate.getAlignmentStart());
				read.setMateUnmappedFlag(mate.getReadUnmappedFlag());
				read.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
				 
				int start = read.getAlignmentStart() < mate.getAlignmentStart() ? read.getAlignmentStart() : mate.getAlignmentStart();
				int stop  = read.getAlignmentEnd() > mate.getAlignmentEnd() ? read.getAlignmentEnd() : mate.getAlignmentEnd();
				
				int insert = stop-start+1;
				
				if (read.getAlignmentStart() > mate.getAlignmentStart()) {
					insert *= -1;
				} else if (read.getAlignmentStart() == mate.getAlignmentStart() && mate.getFirstOfPairFlag()) {
					insert *= -1;
				}
				
				read.setInferredInsertSize(insert);
				
				if (read.getStringAttribute("MC") != null) {
					read.setAttribute("MC", mate.getCigarString());
				}
				
				if (!mate.getReadUnmappedFlag()) {
					read.setMateReferenceName(mate.getReferenceName());
				}
			}
		}
	}
}
 
Example 4
Source File: SimpleAlleleCounter.java    From abra2 with MIT License 5 votes vote down vote up
private Pair<Character,Character> getBaseAtPosition(SAMRecord read, int refPos) {
	int readPos = 0;
	int refPosInRead = read.getAlignmentStart();
	int cigarElementIdx = 0;
	
	while (refPosInRead <= refPos && cigarElementIdx < read.getCigar().numCigarElements() && readPos < read.getReadLength()) {
		CigarElement elem = read.getCigar().getCigarElement(cigarElementIdx++);
		
		switch(elem.getOperator()) {
			case H: //NOOP
				break;
			case S:
			case I:
				readPos += elem.getLength();
				break;
			case D:
			case N:
				refPosInRead += elem.getLength();
				break;
			case M:
				if (refPos < (refPosInRead + elem.getLength())) {
					readPos += refPos - refPosInRead;
					if (readPos < read.getReadLength()) {
						// Found the base.  Return it
						return new Pair<Character, Character>(read.getReadString().charAt(readPos), read.getBaseQualityString().charAt(readPos));
					}
				} else {
					readPos += elem.getLength();
					refPosInRead += elem.getLength();
				}
				break;
			default:
				throw new IllegalArgumentException("Invalid Cigar Operator: " + elem.getOperator() + " for read: " + read.getSAMString());					
		}
	}
	
	return null;
}
 
Example 5
Source File: CramSerilization.java    From cramtools with Apache License 2.0 5 votes vote down vote up
private static void updateTracks(final List<SAMRecord> samRecords, final ReferenceTracks tracks) {
	for (final SAMRecord samRecord : samRecords) {
		if (samRecord.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START
				&& samRecord.getReferenceIndex() == tracks.getSequenceId()) {
			int refPos = samRecord.getAlignmentStart();
			int readPos = 0;
			for (final CigarElement cigarElement : samRecord.getCigar().getCigarElements()) {
				if (cigarElement.getOperator().consumesReferenceBases()) {
					for (int elementIndex = 0; elementIndex < cigarElement.getLength(); elementIndex++)
						tracks.addCoverage(refPos + elementIndex, 1);
				}
				switch (cigarElement.getOperator()) {
				case M:
				case X:
				case EQ:
					for (int pos = readPos; pos < cigarElement.getLength(); pos++) {
						final byte readBase = samRecord.getReadBases()[readPos + pos];
						final byte refBase = tracks.baseAt(refPos + pos);
						if (readBase != refBase)
							tracks.addMismatches(refPos + pos, 1);
					}
					break;

				default:
					break;
				}

				readPos += cigarElement.getOperator().consumesReadBases() ? cigarElement.getLength() : 0;
				refPos += cigarElement.getOperator().consumesReferenceBases() ? cigarElement.getLength() : 0;
			}
		}
	}
}
 
Example 6
Source File: SpliceJunctionCounter.java    From abra2 with MIT License 5 votes vote down vote up
private List<SpliceJunction> getJunctions(SAMRecord read) {
	List<SpliceJunction> junctions = new ArrayList<SpliceJunction>();
	
	int pos = read.getAlignmentStart();
	
	for (CigarElement elem : read.getCigar()) {
		switch (elem.getOperator()) {
			case D:
			case M:
				pos += elem.getLength();
				break;
			case N:
				junctions.add(new SpliceJunction(read.getReferenceName(), pos, pos+elem.getLength()-1));
				pos += elem.getLength();
				break;
			case S:
			case I:
			case H:
				// NOOP
				break;
			default:
				throw new UnsupportedOperationException("Unsupported Cigar Operator: " + elem.getOperator());
		}
	}
	
	return junctions;
}
 
Example 7
Source File: CramSerilization.java    From cramtools with Apache License 2.0 5 votes vote down vote up
public static Map<Integer, AlignmentSpan> getSpans(List<SAMRecord> samRecords) {
	Map<Integer, AlignmentSpan> spans = new HashMap<Integer, AlignmentSpan>();
	int unmapped = 0;
	for (final SAMRecord r : samRecords) {

		int refId = r.getReferenceIndex();
		if (refId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
			unmapped++;
			continue;
		}

		int start = r.getAlignmentStart();
		int end = r.getAlignmentEnd();

		if (spans.containsKey(refId)) {
			spans.get(refId).add(start, end - start, 1);
		} else {
			spans.put(refId, new AlignmentSpan(start, end - start));
		}
	}
	if (unmapped > 0) {
		AlignmentSpan span = new AlignmentSpan(AlignmentSpan.UNMAPPED_SPAN.getStart(),
				AlignmentSpan.UNMAPPED_SPAN.getSpan(), unmapped);
		spans.put(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, span);
	}
	return spans;
}
 
Example 8
Source File: AnnotationUtils.java    From Drop-seq with MIT License 5 votes vote down vote up
public Map<Gene, LocusFunction> getLocusFunctionForReadByGene (final SAMRecord rec, final OverlapDetector<Gene> geneOverlapDetector) {
	Map<Gene, LocusFunction> result = new HashMap<>();
	final Interval readInterval = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd(), rec.getReadNegativeStrandFlag(), rec.getReadName());
	final Collection<Gene> overlappingGenes = geneOverlapDetector.getOverlaps(readInterval);

       for (Gene g: overlappingGenes) {
       	LocusFunction f = getLocusFunctionForRead(rec, g);
           result.put(g, f);
       }
       return result;
}
 
Example 9
Source File: SortedSAMWriter.java    From abra2 with MIT License 5 votes vote down vote up
public void addAlignment(int sampleIdx, SAMRecordWrapper samRecord, int chromosomeChunkIdx) {
	Feature chunk = this.chromosomeChunker.getChunks().get(chromosomeChunkIdx);
	
	SAMRecord read = samRecord.getSamRecord();
	
	// Only output reads with original start pos within specified chromosomeChunk
	// Avoids reads being written in 2 different chunks
	
	int origAlignmentStart = read.getAlignmentStart();
	String yo = read.getStringAttribute("YO");
	if (yo != null) {
		String[] fields = yo.split(":");
		origAlignmentStart = Integer.parseInt(fields[1]);
	}
	
	if (origAlignmentStart >= chunk.getStart() && origAlignmentStart <= chunk.getEnd()) {
		
		if (samRecord.isUnalignedRc() && read.getReadUnmappedFlag()) {
			// This read was reverse complemented, but not updated.
			// Change it back to its original state.
			read.setReadString(rc.reverseComplement(read.getReadString()));
			read.setBaseQualityString(rc.reverse(read.getBaseQualityString()));
			read.setReadNegativeStrandFlag(!read.getReadNegativeStrandFlag());
		}
		
		writers[sampleIdx][chromosomeChunkIdx].addAlignment(read);
	}
}
 
Example 10
Source File: RawContextCigarHandler.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void handleLeftSoftClip(@NotNull final SAMRecord record, @NotNull final CigarElement element) {
    if (variant.position() < record.getAlignmentStart()) {
        int readIndex = record.getReadPositionAtReferencePosition(record.getAlignmentStart()) - 1 - record.getAlignmentStart()
                + (int) variant.position() - variant.alt().length() + variant.ref().length();
        result = RawContext.inSoftClip(readIndex);
    }
}
 
Example 11
Source File: RefContextConsumer.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private boolean reachedDepthLimit(@NotNull final SAMRecord record) {
    int alignmentStart = record.getAlignmentStart();
    int alignmentEnd = record.getAlignmentEnd();

    RefContext startRefContext = candidates.refContext(bounds.chromosome(), alignmentStart);
    RefContext endRefContext = candidates.refContext(bounds.chromosome(), alignmentEnd);

    return startRefContext.reachedLimit() && endRefContext.reachedLimit();
}
 
Example 12
Source File: Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Copied from net.sf.picard.sam.SamPairUtil. This is a more permissive
 * version of the method, which does not reset alignment start and reference
 * for unmapped reads.
 * 
 * @param rec1
 * @param rec2
 * @param header
 */
public static void setLooseMateInfo(final SAMRecord rec1, final SAMRecord rec2, final SAMFileHeader header) {
	if (rec1.getReferenceName() != SAMRecord.NO_ALIGNMENT_REFERENCE_NAME
			&& rec1.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
		rec1.setReferenceIndex(header.getSequenceIndex(rec1.getReferenceName()));
	if (rec2.getReferenceName() != SAMRecord.NO_ALIGNMENT_REFERENCE_NAME
			&& rec2.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
		rec2.setReferenceIndex(header.getSequenceIndex(rec2.getReferenceName()));

	// If neither read is unmapped just set their mate info
	if (!rec1.getReadUnmappedFlag() && !rec2.getReadUnmappedFlag()) {

		rec1.setMateReferenceIndex(rec2.getReferenceIndex());
		rec1.setMateAlignmentStart(rec2.getAlignmentStart());
		rec1.setMateNegativeStrandFlag(rec2.getReadNegativeStrandFlag());
		rec1.setMateUnmappedFlag(false);
		rec1.setAttribute(SAMTag.MQ.name(), rec2.getMappingQuality());

		rec2.setMateReferenceIndex(rec1.getReferenceIndex());
		rec2.setMateAlignmentStart(rec1.getAlignmentStart());
		rec2.setMateNegativeStrandFlag(rec1.getReadNegativeStrandFlag());
		rec2.setMateUnmappedFlag(false);
		rec2.setAttribute(SAMTag.MQ.name(), rec1.getMappingQuality());
	}
	// Else if they're both unmapped set that straight
	else if (rec1.getReadUnmappedFlag() && rec2.getReadUnmappedFlag()) {
		rec1.setMateNegativeStrandFlag(rec2.getReadNegativeStrandFlag());
		rec1.setMateUnmappedFlag(true);
		rec1.setAttribute(SAMTag.MQ.name(), null);
		rec1.setInferredInsertSize(0);

		rec2.setMateNegativeStrandFlag(rec1.getReadNegativeStrandFlag());
		rec2.setMateUnmappedFlag(true);
		rec2.setAttribute(SAMTag.MQ.name(), null);
		rec2.setInferredInsertSize(0);
	}
	// And if only one is mapped copy it's coordinate information to the
	// mate
	else {
		final SAMRecord mapped = rec1.getReadUnmappedFlag() ? rec2 : rec1;
		final SAMRecord unmapped = rec1.getReadUnmappedFlag() ? rec1 : rec2;

		mapped.setMateReferenceIndex(unmapped.getReferenceIndex());
		mapped.setMateAlignmentStart(unmapped.getAlignmentStart());
		mapped.setMateNegativeStrandFlag(unmapped.getReadNegativeStrandFlag());
		mapped.setMateUnmappedFlag(true);
		mapped.setInferredInsertSize(0);

		unmapped.setMateReferenceIndex(mapped.getReferenceIndex());
		unmapped.setMateAlignmentStart(mapped.getAlignmentStart());
		unmapped.setMateNegativeStrandFlag(mapped.getReadNegativeStrandFlag());
		unmapped.setMateUnmappedFlag(false);
		unmapped.setInferredInsertSize(0);
	}

	boolean firstIsFirst = rec1.getAlignmentStart() < rec2.getAlignmentStart();
	int insertSize = firstIsFirst ? SamPairUtil.computeInsertSize(rec1, rec2) : SamPairUtil.computeInsertSize(rec2,
			rec1);

	rec1.setInferredInsertSize(firstIsFirst ? insertSize : -insertSize);
	rec2.setInferredInsertSize(firstIsFirst ? -insertSize : insertSize);

}
 
Example 13
Source File: BamToDetailsConversion.java    From chipster with MIT License 4 votes vote down vote up
/**
 * Find reads in a given range.
 * 
 * <p>
 * TODO add cigar to the list of returned values
 * <p>
 * TODO add pair information to the list of returned values
 * 
 * @param request
 * @return
 * @throws InterruptedException 
 */
@Override
protected void processDataRequest(DataRequest request) throws InterruptedException {
	
	// Read the given region
	CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue());
	
	// Produce results
	while (iterator.hasNext()) {

		List<Feature> responseList = new LinkedList<Feature>();

		// Split results into chunks
		for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) {
			SAMRecord record = iterator.next();

			// Region for this read
			Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);

			// Values for this read
			LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

			Feature read = new Feature(recordRegion, values);

			if (request.getRequestedContents().contains(DataType.ID)) {
				values.put(DataType.ID, record.getReadName());
			}

			if (request.getRequestedContents().contains(DataType.STRAND)) {
				values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));					
			}

			if (request.getRequestedContents().contains(DataType.QUALITY)) {
				values.put(DataType.QUALITY, record.getBaseQualityString());
			}

			if (request.getRequestedContents().contains(DataType.CIGAR)) {
				Cigar cigar = new Cigar(read, record.getCigar());
				values.put(DataType.CIGAR, cigar);
			}

			// TODO Deal with "=" and "N" in read string
			if (request.getRequestedContents().contains(DataType.SEQUENCE)) {
				String seq = record.getReadString();
				values.put(DataType.SEQUENCE, seq);
			}

			if (request.getRequestedContents().contains(DataType.MATE_POSITION)) {
				
				BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(),
						new Chromosome(record.getMateReferenceName()));
				
				values.put(DataType.MATE_POSITION, mate);
			}
			
			if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) {
				Object ng = record.getAttribute("NH");
				if (ng != null) {
					values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH"));
				}
			}
			
			/*
			 * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types
			 * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding
			 * the same reads to their read sets again and again.
			 */
			responseList.add(read);
		}

		// Send result			
		super.createDataResult(new DataResult(request.getStatus(), responseList));			
	}

	// We are done
	iterator.close();
}
 
Example 14
Source File: SortedSAMWriter.java    From abra2 with MIT License 4 votes vote down vote up
@Override
public int compare(SAMRecord o1, SAMRecord o2) {
	return o1.getAlignmentStart()-o2.getAlignmentStart();
}
 
Example 15
Source File: SamMultiRestrictingIterator.java    From rtg-tools with BSD 2-Clause "Simplified" License 4 votes vote down vote up
private void populateNext(boolean force) throws IOException {
  final SAMRecord previousRecord = mNextRecord;
  mNextRecord = null;
  if (force) {
    advanceSubIterator();
  }
  while (mCurrentIt != null) {
    if (!mCurrentIt.hasNext()) {  // Only happens when stream is exhausted, so effectively just closes things out.
      advanceSubIterator();
    } else {
      final SAMRecord rec = nextOrBuffered();
      final int refId = rec.getReferenceIndex();

      if (refId > mCurrentTemplate) { // current offset has exceeded region and block overlapped next template
        recordPreviousAndAdvance(previousRecord, rec);
      } else {

        if (refId < mCurrentTemplate) { // Current block may occasionally return records from the previous template if the block overlaps
          continue;
        }

        final int alignmentStart = rec.getAlignmentStart() - 1; // to 0-based
        int alignmentEnd = rec.getAlignmentEnd(); // to 0-based exclusive = noop
        if (alignmentEnd == 0) {
          // Use the read length to get an end point for unmapped reads
          alignmentEnd = rec.getAlignmentStart() + rec.getReadLength();
        }

        if (alignmentEnd <= mCurrentRegion.getStart()) {  // before region
          continue;
        }

        if (alignmentStart <= mPreviousAlignmentStart) { // this record would have been already returned by an earlier region
          //Diagnostic.developerLog("Ignoring record from earlier block at " + rec.getReferenceName() + ":" + rec.getAlignmentStart());
          ++mDoubleFetched;
          if (mDoubleFetched % 100000 == 0) {
            Diagnostic.developerLog("Many double-fetched records for source " + mLabel + " noticed at " + rec.getReferenceName() + ":" + rec.getAlignmentStart() + " in region " + mCurrentRegion + " (skipping through to " + mPreviousAlignmentStart + ")");
          }
          continue;
        }

        if (alignmentStart >= mCurrentRegion.getEnd()) {  // past current region, advance the iterator and record the furtherest we got
          recordPreviousAndAdvance(previousRecord, rec);
          continue;
        }

        mNextRecord = rec;
        break;
      }
    }
  }
}
 
Example 16
Source File: BAMQueryFilteringIterator.java    From cramtools with Apache License 2.0 4 votes vote down vote up
SAMRecord advance() {
	while (true) {
		// Pull next record from stream
		if (!wrappedIterator.hasNext())
			return null;

		final SAMRecord record = wrappedIterator.next();
		// If beyond the end of this reference sequence, end iteration
		final int referenceIndex = record.getReferenceIndex();
		if (referenceIndex != mReferenceIndex) {
			if (referenceIndex < 0 || referenceIndex > mReferenceIndex) {
				return null;
			}
			// If before this reference sequence, continue
			continue;
		}
		if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
			// Quick exit to avoid expensive alignment end calculation
			return record;
		}
		final int alignmentStart = record.getAlignmentStart();
		// If read is unmapped but has a coordinate, return it if the
		// coordinate is within
		// the query region, regardless of whether the mapped mate will
		// be returned.
		final int alignmentEnd;
		if (mQueryType == QueryType.STARTING_AT) {
			alignmentEnd = -1;
		} else {
			alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START ? record.getAlignmentEnd()
					: alignmentStart);
		}

		if (alignmentStart > mRegionEnd) {
			// If scanned beyond target region, end iteration
			return null;
		}
		// Filter for overlap with region
		if (mQueryType == QueryType.CONTAINED) {
			if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) {
				return record;
			}
		} else if (mQueryType == QueryType.OVERLAPPING) {
			if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) {
				return record;
			}
		} else {
			if (alignmentStart == mRegionStart) {
				return record;
			}
		}
	}
}
 
Example 17
Source File: VariantHotspotEvidenceFactory.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
private boolean samRecordOverlapsVariant(int start, int end, @NotNull final SAMRecord record) {
    return record.getAlignmentStart() <= start && record.getAlignmentEnd() >= end;
}
 
Example 18
Source File: SAMRecordUtils.java    From abra2 with MIT License 4 votes vote down vote up
public static boolean hasPossibleAdapterReadThrough(SAMRecord read, Map<String, SAMRecordWrapper> firstReads, Map<String, SAMRecordWrapper> secondReads) {
	
	boolean hasReadThrough = false;
	
	// Check for fragment read through in paired end
	if (read.getReadPairedFlag() && !read.getReadUnmappedFlag() && !read.getMateUnmappedFlag() &&
			read.getAlignmentStart() == read.getMateAlignmentStart() &&
			read.getReadNegativeStrandFlag() != read.getMateNegativeStrandFlag()) {
		
		SAMRecordWrapper pair = null;
		
		if (read.getFirstOfPairFlag()) {
			pair = secondReads.get(read.getReadName() + "_" + read.getMateAlignmentStart());
		} else {
			pair = firstReads.get(read.getReadName() + "_" + read.getMateAlignmentStart());
		}
		
		if (pair != null && read.getCigar().getCigarElements().size() > 0 && pair.getSamRecord().getCigar().getCigarElements().size() > 0) {
			
			// Looking for something like:
			//     -------->
			//  <--------
			SAMRecord first = null;
			SAMRecord second = null;
			if (read.getReadNegativeStrandFlag()) {
				first = read;
				second = pair.getSamRecord();
			} else {
				first = pair.getSamRecord();
				second = read;
			}
			
			CigarElement firstElement = first.getCigar().getFirstCigarElement();
			CigarElement lastElement = second.getCigar().getLastCigarElement();
			
			if (firstElement.getOperator() == CigarOperator.S && lastElement.getOperator() == CigarOperator.S) {
				// We likely have read through into adapter here.
				hasReadThrough = true;
			}
		}
	}

	return hasReadThrough;
}
 
Example 19
Source File: Reader.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
/**
 * To compare how sharded reading works vs. plain HTSJDK sequential iteration,
 * this method implements such iteration.
 * This makes it easier to discover errors such as reads that are somehow
 * skipped by a sharded approach.
 */
public static Iterable<Read> readSequentiallyForTesting(Objects storageClient,
    String storagePath, Contig contig,
    ReaderOptions options) throws IOException {
  Stopwatch timer = Stopwatch.createStarted();
  SamReader samReader = BAMIO.openBAM(storageClient, storagePath, options.getStringency());
  SAMRecordIterator iterator =  samReader.queryOverlapping(contig.referenceName,
      (int) contig.start + 1,
      (int) contig.end);
  List<Read> reads = new ArrayList<Read>();

  int recordsBeforeStart = 0;
  int recordsAfterEnd = 0;
  int mismatchedSequence = 0;
  int recordsProcessed = 0;
  Filter filter = setupFilter(options, contig.referenceName);
  while (iterator.hasNext()) {
    SAMRecord record = iterator.next();
    final boolean passesFilter = passesFilter(record, filter, contig.referenceName);

    if (!passesFilter) {
      mismatchedSequence++;
      continue;
    }
    if (record.getAlignmentStart() < contig.start) {
      recordsBeforeStart++;
      continue;
    }
    if (record.getAlignmentStart() > contig.end) {
      recordsAfterEnd++;
      continue;
    }
    reads.add(ReadUtils.makeReadGrpc(record));
    recordsProcessed++;
  }
  timer.stop();
  LOG.info("NON SHARDED: Processed " + recordsProcessed +
      " in " + timer +
      ". Speed: " + (recordsProcessed*1000)/timer.elapsed(TimeUnit.MILLISECONDS) + " reads/sec"
      + ", skipped other sequences " + mismatchedSequence
      + ", skippedBefore " + recordsBeforeStart
      + ", skipped after " + recordsAfterEnd);
  return reads;
}