Java Code Examples for htsjdk.samtools.CigarOperator#EQ

The following examples show how to use htsjdk.samtools.CigarOperator#EQ . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Converts a CIGAR that may contain new style operators for match and
 * mismatch into the legacy style CIGAR.
 * @param cigar the CIGAR to convert
 * @return the legacy CIGAR
 */
public static Cigar convertToLegacyCigar(Cigar cigar) {
  final Cigar cg = new Cigar();
  int count = 0;
  for (int i = 0; i < cigar.numCigarElements(); ++i) {
    final CigarElement ce = cigar.getCigarElement(i);
    if (ce.getOperator() == CigarOperator.EQ || ce.getOperator() == CigarOperator.X) {
      count += ce.getLength();
    } else {
      if (count > 0) {
        cg.add(new CigarElement(count, CigarOperator.M));
      }
      cg.add(ce);
      count = 0;
    }
  }
  if (count > 0) {
    cg.add(new CigarElement(count, CigarOperator.M));
  }
  return cg;
}
 
Example 2
Source File: ReAligner.java    From abra2 with MIT License 4 votes vote down vote up
private List<Feature> getExtraJunctions(ContigAlignerResult result, List<Feature> junctions, List<Feature> junctions2) {
	// Look for junctions with adjacent deletions.
	// Treat these as putative novel junctions.
	Set<Feature> junctionSet = new HashSet<Feature>(junctions);
	junctionSet.addAll(junctions2);
	List<Feature> extraJunctions = new ArrayList<Feature>();
	
	Cigar cigar = TextCigarCodec.decode(result.getCigar());
	
	boolean isInGap = false;
	int gapStart = -1;
	int gapLength = -1;
	int numElems = -1;
	int refOffset = 0;
	
	for (CigarElement elem : cigar.getCigarElements()) {
		if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) {
			if (!isInGap) {
				isInGap = true;
				gapStart = refOffset;
				gapLength = elem.getLength();
				numElems = 1;
			} else {
				gapLength += elem.getLength();
				numElems += 1;
			}
		} else {
			if (isInGap) {
				
				if (numElems > 1) {
					long start = result.getGenomicPos() + gapStart;
					long end = start + gapLength;
					Feature junc = new Feature(result.getChromosome(), start, end-1);
					if (!junctionSet.contains(junc)) {
						Logger.info("Extra junction idenfified: %s", junc);
						extraJunctions.add(junc);
					}
				}
				
				isInGap = false;
				gapStart = -1;
				gapLength = -1;
				numElems = -1;
			}
		}
		
		if (elem.getOperator() == CigarOperator.M || 
			elem.getOperator() == CigarOperator.D || 
			elem.getOperator() == CigarOperator.N ||
			elem.getOperator() == CigarOperator.X ||
			elem.getOperator() == CigarOperator.EQ) {
			
			refOffset += elem.getLength();
		}
	}
	
	return extraJunctions;
}
 
Example 3
Source File: ReAligner.java    From abra2 with MIT License 4 votes vote down vote up
private List<Feature> getExonSkippingJunctions(ContigAlignerResult result, List<Feature> junctions) {

		// Handles special case where an exon skipping junction causes large gaps with a tiny (~1)
		// number of bases mapped somewhere in the gap
		Set<Feature> junctionSet = new HashSet<Feature>(junctions);
		List<Feature> extraJunctions = new ArrayList<Feature>();
		
		Cigar cigar = TextCigarCodec.decode(result.getCigar());
		
		boolean isInGap = false;
		int gapStart = -1;
		int gapLength = -1;
		int numElems = -1;
		int refOffset = 0;
		
		int maxBasesInMiddle = 5;
		int middleBases = -1;
		
		CigarOperator prev = CigarOperator.M;
		
		for (CigarElement elem : cigar.getCigarElements()) {
			if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) {
				if (!isInGap) {
					isInGap = true;
					gapStart = refOffset;
					gapLength = elem.getLength();
					numElems = 1;
					middleBases = 0;
				} else {
					gapLength += elem.getLength();
					numElems += 1;
				}
			} else {
				if (isInGap) {
					
					if (elem.getLength() + middleBases < maxBasesInMiddle) {
						middleBases += elem.getLength();
					} else {
						
						if (numElems > 1 && middleBases > 0 && (prev == CigarOperator.D || prev == CigarOperator.N)) {
							long start = result.getGenomicPos() + gapStart;
							long end = start + gapLength;
							
							// Find junction start / end points closest to gap start / end point
							long closestStart = junctions.get(0).getStart();
							long closestEnd = junctions.get(0).getEnd();
							for (Feature junction : junctions) {
								if (Math.abs(junction.getStart()-start) < Math.abs(closestStart-start)) {
									closestStart = junction.getStart();
								}

								if (Math.abs(junction.getEnd()-end) < Math.abs(closestEnd-end)) {
									closestEnd = junction.getEnd();
								}
							}
							
							if (closestEnd > closestStart+1) {
								Feature junc = new Feature(result.getChromosome(), closestStart, closestEnd);
								if (!junctionSet.contains(junc)) {
									Logger.info("Potential exon skipping junction idenfified: %s", junc);
									extraJunctions.add(junc);
								}
							}
						}
						
						isInGap = false;
						gapStart = -1;
						gapLength = -1;
						numElems = -1;
						middleBases = -1;
					}
				}
			}
			
			if (elem.getOperator() == CigarOperator.M || 
				elem.getOperator() == CigarOperator.D || 
				elem.getOperator() == CigarOperator.N ||
				elem.getOperator() == CigarOperator.X ||
				elem.getOperator() == CigarOperator.EQ) {
				
				refOffset += elem.getLength();
			}
			
			prev = elem.getOperator();
		}
		
		return extraJunctions;
	}
 
Example 4
Source File: CigarItem.java    From chipster with MIT License 4 votes vote down vote up
public boolean isVisible() {
	return cigarElement.getOperator() == CigarOperator.M || cigarElement.getOperator() == CigarOperator.X || cigarElement.getOperator() == CigarOperator.EQ;
}
 
Example 5
Source File: Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * A rip off samtools bam_md.c
 * 
 * @param record
 * @param ref
 * @param calcMD
 * @param calcNM
 */
public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	Cigar cigar = record.getCigar();
	List<CigarElement> cigarElements = cigar.getCigarElements();
	byte[] seq = record.getReadBases();
	int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	StringBuffer str = new StringBuffer();

	int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		CigarElement ce = cigarElements.get(i);
		int j, l = ce.getLength();
		CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < l; ++j) {
				int z = y + j;

				if (ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j];

				if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j]);
					u = 0;
					++nm;
				}
			}
			if (j < l)
				break;
			x += l;
			y += l;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < l; ++j) {
				if (ref[x + j] == 0)
					break;
				str.appendCodePoint(ref[x + j]);
			}
			u = 0;
			if (j < l)
				break;
			x += l;
			nm += l;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += l;
			if (op == CigarOperator.INSERTION)
				nm += l;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += l;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}
 
Example 6
Source File: AlignmentsTags.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}