Java Code Examples for htsjdk.samtools.Cigar#getCigarElements()
The following examples show how to use
htsjdk.samtools.Cigar#getCigarElements() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CollectSamErrorMetricsTest.java From picard with MIT License | 6 votes |
@Test(dataProvider = "provideForTestHasDeletionBeenProcessed") public void testHasDeletionBeenProcessed(final String cigarString, final List<Boolean> expected) { final Cigar cigar = TextCigarCodec.decode(cigarString); final SAMRecord deletionRecord = createRecordFromCigar(cigar.toString()); final CollectSamErrorMetrics collectSamErrorMetrics = new CollectSamErrorMetrics(); final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr1", 2000); int position = 100; int iExpected = 0; for(CigarElement cigarElement : cigar.getCigarElements()) { for(int iCigarElementPosition = 0; iCigarElementPosition < cigarElement.getLength(); iCigarElementPosition++) { final SamLocusIterator.LocusInfo locusInfo = new SamLocusIterator.LocusInfo(samSequenceRecord, ++position); if(cigarElement.getOperator() == CigarOperator.D) { Assert.assertEquals(collectSamErrorMetrics.processDeletionLocus(new SamLocusIterator.RecordAndOffset(deletionRecord, 0), locusInfo), (boolean) expected.get(iExpected++)); } } } }
Example 2
Source File: AbstractReadThreadingGraph.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Determine whether the provided cigar is okay to merge into the reference path * * @param cigar the cigar to analyze * @param requireFirstElementM if true, require that the first cigar element be an M operator in order for it to be okay * @param requireLastElementM if true, require that the last cigar element be an M operator in order for it to be okay * @return true if it's okay to merge, false otherwise */ @VisibleForTesting static boolean cigarIsOkayToMerge(final Cigar cigar, final boolean requireFirstElementM, final boolean requireLastElementM) { final List<CigarElement> elements = cigar.getCigarElements(); final int numElements = elements.size(); // don't allow more than a couple of different ops if (numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY) { return false; } // the first element must be an M if (requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M) { return false; } // the last element must be an M if (requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M) { return false; } // note that there are checks for too many mismatches in the dangling branch later in the process return true; }
Example 3
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips */ public static int alignmentStartShift(final Cigar cigar, final int numClipped) { int refBasesClipped = 0; int elementStart = 0; // this and elementEnd are indices in the read's bases for (final CigarElement element : cigar.getCigarElements()) { final CigarOperator operator = element.getOperator(); // hard clips consume neither read bases nor reference bases and are irrelevant if (operator == CigarOperator.HARD_CLIP) { continue; } final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0); if (elementEnd <= numClipped) { // totally within clipped span -- this includes deletions immediately following clipping refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0; } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases final int clippedLength = numClipped - elementStart; refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0; break; } elementStart = elementEnd; } return refBasesClipped; }
Example 4
Source File: AlignmentUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Workhorse for trimCigarByBases and trimCigarByReference * * @param cigar a non-null Cigar to trim down * @param start Where should we start keeping bases in the cigar (inclusive)? The first position is 0 * @param end Where should we stop keeping bases in the cigar (inclusive)? The maximum value is cigar.getLength() - 1 * @param byReference should start and end be interpreted as position in the reference or the read to trim to/from? * @return a non-null cigar */ @SuppressWarnings("fallthrough") private static CigarBuilder.Result trimCigar(final Cigar cigar, final int start, final int end, final boolean byReference) { ParamUtils.isPositiveOrZero(start, "start position can't be negative"); Utils.validateArg(end >= start, () -> "end " + end + " is before start " + start); final CigarBuilder newElements = new CigarBuilder(); // these variables track the inclusive start and exclusive end of the current cigar element in reference (if byReference) or read (otherwise) coordinates int elementStart; // inclusive int elementEnd = 0; // exclusive -- start of next element for ( final CigarElement elt : cigar.getCigarElements() ) { elementStart = elementEnd; elementEnd = elementStart + (byReference ? lengthOnReference(elt) : lengthOnRead(elt)); // we are careful to include zero-length elements at both ends, that is, elements with elementStart == elementEnd == start and elementStart == elementEnd == end + 1 if (elementEnd < start || (elementEnd == start && elementStart < start)) { continue; } else if (elementStart > end && elementEnd > end + 1) { break; } final int overlapLength = elementEnd == elementStart ? elt.getLength() : Math.min(end + 1, elementEnd) - Math.max(start, elementStart); newElements.add(new CigarElement(overlapLength, elt.getOperator())); } Utils.validateArg(elementEnd > end, () -> "cigar elements don't reach end position (inclusive) " + end); return newElements.makeAndRecordDeletionsRemovedResult(); }
Example 5
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Inverts the order of the operators in the cigar. * Eg 10M1D20M -> 20M1D10M */ public static Cigar invertCigar (final Cigar cigar) { Utils.nonNull(cigar); final List<CigarElement> els = new ArrayList<>(cigar.getCigarElements()); Collections.reverse(els); return new Cigar(els); }
Example 6
Source File: ReadClipperUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private int leadingCigarElementLength(final Cigar cigar, final CigarOperator operator) { for (final CigarElement cigarElement : cigar.getCigarElements()) { if (cigarElement.getOperator() == operator) return cigarElement.getLength(); if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) break; } return 0; }
Example 7
Source File: AlignmentUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Is the offset inside a deletion? * * @param cigar the read's CIGAR -- cannot be null * @param offset the offset into the CIGAR * @return true if the offset is inside a deletion, false otherwise */ public static boolean isInsideDeletion(final Cigar cigar, final int offset) { Utils.nonNull(cigar); if ( offset < 0 ) return false; // pos counts read bases int pos = 0; int prevPos = 0; for (final CigarElement ce : cigar.getCigarElements()) { switch (ce.getOperator()) { case I: case S: case D: case M: case EQ: case X: prevPos = pos; pos += ce.getLength(); break; case H: case P: case N: break; default: throw new GATKException("Unsupported cigar operator: " + ce.getOperator()); } // Is the offset inside a deletion? if ( prevPos < offset && pos >= offset && ce.getOperator() == CigarOperator.D ) { return true; } } return false; }
Example 8
Source File: AlignmentUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Removing a trailing deletion from the incoming cigar if present * * @param c the cigar we want to update * @return a non-null Cigar */ public static Cigar removeTrailingDeletions(final Cigar c) { final List<CigarElement> elements = c.getCigarElements(); if ( elements.get(elements.size() - 1).getOperator() != CigarOperator.D ) return c; return new Cigar(elements.subList(0, elements.size() - 1)); }
Example 9
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * replace soft clips (S) with match (M) operators, normalizing the result by all the transformations of the {@link CigarBuilder} class: * merging consecutive identical operators and removing zero-length elements. For example 10S10M -> 20M and 10S10M10I10I -> 20M20I. */ public static Cigar revertSoftClips(final Cigar originalCigar) { final CigarBuilder builder = new CigarBuilder(); for (final CigarElement element : originalCigar.getCigarElements()) { if (element.getOperator() == CigarOperator.SOFT_CLIP) { builder.add(new CigarElement(element.getLength(), CigarOperator.MATCH_OR_MISMATCH)); } else { builder.add(element); } } return builder.make(); }
Example 10
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Removes all clipping and padding operators from the cigar. */ public static Cigar removeClipsAndPadding(final Cigar cigar) { Utils.nonNull(cigar, "cigar is null"); final List<CigarElement> elements = new ArrayList<>(cigar.numCigarElements()); for ( final CigarElement ce : cigar.getCigarElements() ) { if ( !isClipOrPaddingOperator(ce.getOperator()) ) { elements.add(ce); } } return new Cigar(elements); }
Example 11
Source File: SimpleAlleleCounter.java From abra2 with MIT License | 5 votes |
private IndelInfo checkForIndelAtLocus(int alignmentStart, Cigar cigar, int refPos) { IndelInfo ret = null; int readIdx = 0; int currRefPos = alignmentStart; for (CigarElement element : cigar.getCigarElements()) { if (element.getOperator() == CigarOperator.M) { readIdx += element.getLength(); currRefPos += element.getLength(); } else if (element.getOperator() == CigarOperator.I) { if (currRefPos == refPos+1) { ret = new IndelInfo(element, readIdx); break; } readIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.D) { if (currRefPos == refPos+1) { ret = new IndelInfo(element, readIdx); break; } currRefPos += element.getLength(); } else if (element.getOperator() == CigarOperator.S) { readIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.N) { currRefPos += element.getLength(); } if (currRefPos > refPos+1) { break; } } return ret; }
Example 12
Source File: CadabraProcessor.java From abra2 with MIT License | 5 votes |
private IndelInfo checkForIndelAtLocus(int alignmentStart, Cigar cigar, int refPos) { IndelInfo ret = null; int readIdx = 0; int currRefPos = alignmentStart; for (CigarElement element : cigar.getCigarElements()) { if (element.getOperator() == CigarOperator.M) { readIdx += element.getLength(); currRefPos += element.getLength(); } else if (element.getOperator() == CigarOperator.I) { if (currRefPos == refPos+1) { ret = new IndelInfo(element, readIdx); break; } readIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.D) { if (currRefPos == refPos+1) { ret = new IndelInfo(element, readIdx); break; } currRefPos += element.getLength(); } else if (element.getOperator() == CigarOperator.S) { readIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.N) { currRefPos += element.getLength(); } if (currRefPos > refPos+1) { break; } } return ret; }
Example 13
Source File: FilterBam.java From Drop-seq with MIT License | 5 votes |
/** * Rejects reads if the sum of the cigar string bases is less than M_BASES_IN_CIGAR, reject the read. * Don't process if M_BASES_IN_CIGAR is -1. * @return return false if the sum of the matching bases in the cigar is greater than the threshold. */ boolean rejectOnCigar(final SAMRecord r) { if (this.SUM_MATCHING_BASES==null) return (false); Cigar c = r.getCigar(); int count=0; for (CigarElement ce: c.getCigarElements()) if (ce.getOperator()==CigarOperator.M) count+=ce.getLength(); if (count>=this.SUM_MATCHING_BASES) return false; return true; }
Example 14
Source File: AlignmentsTags.java From cramtools with Apache License 2.0 | 4 votes |
/** * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags} * but allows for reference excerpt instead of a full reference sequence. * * @param record * SAMRecord to inject NM and MD tags into * @param ref * reference sequence bases, may be a subsequence starting at * refOffest * @param refOffset * array offset of the reference bases, 0 is the same as the * whole sequence. This value will be subtracted from the * reference array index in calculations. * @param calcMD * calculate MD tag if true * @param calcNM * calculate NM tag if true */ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset, final boolean calcMD, final boolean calcNM) { if (!calcMD && !calcNM) return; final Cigar cigar = record.getCigar(); final List<CigarElement> cigarElements = cigar.getCigarElements(); final byte[] seq = record.getReadBases(); final int start = record.getAlignmentStart() - 1; int i, x, y, u = 0; int nm = 0; final StringBuilder str = new StringBuilder(); final int size = cigarElements.size(); for (i = y = 0, x = start; i < size; ++i) { final CigarElement ce = cigarElements.get(i); int j; final int length = ce.getLength(); final CigarOperator op = ce.getOperator(); if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (j = 0; j < length; ++j) { final int z = y + j; if (refOffset + ref.length <= x + j) break; // out of boundary int c1 = 0; int c2 = 0; // try { c1 = seq[z]; c2 = ref[x + j - refOffset]; if ((c1 == c2) || c1 == 0) { // a match ++u; } else { str.append(u); str.appendCodePoint(ref[x + j - refOffset]); u = 0; ++nm; } } if (j < length) break; x += length; y += length; } else if (op == CigarOperator.DELETION) { str.append(u); str.append('^'); for (j = 0; j < length; ++j) { if (ref[x + j - refOffset] == 0) break; str.appendCodePoint(ref[x + j - refOffset]); } u = 0; if (j < length) break; x += length; nm += length; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { y += length; if (op == CigarOperator.INSERTION) nm += length; } else if (op == CigarOperator.SKIPPED_REGION) { x += length; } } str.append(u); if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString()); if (calcNM) record.setAttribute(SAMTag.NM.name(), nm); }
Example 15
Source File: AlignmentUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Count how many bases mismatch the reference. Indels are not considered mismatching. * * @param r the sam record to check against * @param refSeq the byte array representing the reference sequence * @param refIndex the index in the reference byte array of the read's first base (the reference index * is matching the alignment start, there may be tons of soft-clipped bases before/after * that so it's wrong to compare with getLength() here.). Note that refIndex is * zero based, not 1 based * @param startOnRead the index in the read's bases from which we start counting * @param nReadBases the number of bases after (but including) startOnRead that we check * @return non-null object representing the mismatch count */ @SuppressWarnings("fallthrough") public static MismatchCount getMismatchCount(GATKRead r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) { Utils.nonNull( r ); Utils.nonNull( refSeq ); if ( refIndex < 0 ) throw new IllegalArgumentException("attempting to calculate the mismatch count with a reference index that is negative"); if ( startOnRead < 0 ) throw new IllegalArgumentException("attempting to calculate the mismatch count with a read start that is negative"); if ( nReadBases < 0 ) throw new IllegalArgumentException("attempting to calculate the mismatch count for a negative number of read bases"); if ( refSeq.length - refIndex < (r.getEnd() - r.getStart()) ) throw new IllegalArgumentException("attempting to calculate the mismatch count against a reference string that is smaller than the read"); MismatchCount mc = new MismatchCount(); int readIdx = 0; final int endOnRead = startOnRead + nReadBases - 1; // index of the last base on read we want to count (note we are including soft-clipped bases with this math) final byte[] readSeq = r.getBases(); final Cigar c = r.getCigar(); final byte[] readQuals = r.getBaseQualities(); for (final CigarElement ce : c.getCigarElements()) { if (readIdx > endOnRead) break; final int elementLength = ce.getLength(); switch (ce.getOperator()) { case X: mc.numMismatches += elementLength; for (int j = 0; j < elementLength; j++) mc.mismatchQualities += readQuals[readIdx+j]; case EQ: refIndex += elementLength; readIdx += elementLength; break; case M: for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) { if (refIndex >= refSeq.length) continue; // TODO : It should never happen, we should throw exception here if (readIdx < startOnRead) continue; if (readIdx > endOnRead) break; byte refChr = refSeq[refIndex]; byte readChr = readSeq[readIdx]; // Note: we need to count X/N's as mismatches because that's what SAM requires //if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 || // BaseUtils.simpleBaseToBaseIndex(refChr) == -1 ) // continue; // do not count Ns/Xs/etc ? if (readChr != refChr) { mc.numMismatches++; mc.mismatchQualities += readQuals[readIdx]; } } break; case I: case S: readIdx += elementLength; break; case D: case N: refIndex += elementLength; break; case H: case P: break; default: throw new GATKException("The " + ce.getOperator() + " cigar element is not currently supported"); } } return mc; }
Example 16
Source File: ReadClipperUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips(final Cigar cigar) { for (final CigarElement cigarElement : cigar.getCigarElements()) if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) return true; return false; }
Example 17
Source File: CompareToReference2.java From abra2 with MIT License | 4 votes |
public String getAlternateReference(int refStart, int refEnd, String chromosome, String seq, Cigar cigar) { String alt = null; if (refEnd < getRefLength(chromosome)) { StringBuffer altBuf = new StringBuffer(seq.length()); int readIdx = 0; int refIdx = refStart-1; for (CigarElement element : cigar.getCigarElements()) { if (element.getOperator() == CigarOperator.M) { for (int i=0; i<element.getLength(); i++) { if (refIdx >= getRefLength(chromosome)) { // You're off the edge of the map matey. Monsters be here! // This read has aligned across chromosomes. Do not proceed. return null; } char refBase = getRefBase(refIdx, chromosome); altBuf.append(refBase); readIdx++; refIdx++; } } else if (element.getOperator() == CigarOperator.I) { altBuf.append(seq.substring(readIdx, readIdx + element.getLength())); readIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.D) { refIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.S) { readIdx += element.getLength(); } } alt = altBuf.toString(); } return alt; }
Example 18
Source File: ReAligner.java From abra2 with MIT License | 4 votes |
private List<Feature> getExonSkippingJunctions(ContigAlignerResult result, List<Feature> junctions) { // Handles special case where an exon skipping junction causes large gaps with a tiny (~1) // number of bases mapped somewhere in the gap Set<Feature> junctionSet = new HashSet<Feature>(junctions); List<Feature> extraJunctions = new ArrayList<Feature>(); Cigar cigar = TextCigarCodec.decode(result.getCigar()); boolean isInGap = false; int gapStart = -1; int gapLength = -1; int numElems = -1; int refOffset = 0; int maxBasesInMiddle = 5; int middleBases = -1; CigarOperator prev = CigarOperator.M; for (CigarElement elem : cigar.getCigarElements()) { if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) { if (!isInGap) { isInGap = true; gapStart = refOffset; gapLength = elem.getLength(); numElems = 1; middleBases = 0; } else { gapLength += elem.getLength(); numElems += 1; } } else { if (isInGap) { if (elem.getLength() + middleBases < maxBasesInMiddle) { middleBases += elem.getLength(); } else { if (numElems > 1 && middleBases > 0 && (prev == CigarOperator.D || prev == CigarOperator.N)) { long start = result.getGenomicPos() + gapStart; long end = start + gapLength; // Find junction start / end points closest to gap start / end point long closestStart = junctions.get(0).getStart(); long closestEnd = junctions.get(0).getEnd(); for (Feature junction : junctions) { if (Math.abs(junction.getStart()-start) < Math.abs(closestStart-start)) { closestStart = junction.getStart(); } if (Math.abs(junction.getEnd()-end) < Math.abs(closestEnd-end)) { closestEnd = junction.getEnd(); } } if (closestEnd > closestStart+1) { Feature junc = new Feature(result.getChromosome(), closestStart, closestEnd); if (!junctionSet.contains(junc)) { Logger.info("Potential exon skipping junction idenfified: %s", junc); extraJunctions.add(junc); } } } isInGap = false; gapStart = -1; gapLength = -1; numElems = -1; middleBases = -1; } } } if (elem.getOperator() == CigarOperator.M || elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N || elem.getOperator() == CigarOperator.X || elem.getOperator() == CigarOperator.EQ) { refOffset += elem.getLength(); } prev = elem.getOperator(); } return extraJunctions; }
Example 19
Source File: IndelShifter.java From abra2 with MIT License | 4 votes |
public Cigar shiftAllIndelsLeft(int refStart, int refEnd, String chromosome, Cigar cigar, String seq, CompareToReference2 c2r) { try { List<CigarElement> elems = new ArrayList<CigarElement>(cigar.getCigarElements()); int elemSize = elems.size(); for (int i=1; i<elemSize-1; i++) { int refOffset = 0; int readOffset = 0; for (int j=0; j<i-1; j++) { refOffset += getRefOffset(elems.get(j)); readOffset += getReadOffset(elems.get(j)); } CigarElement prev = elems.get(i-1); CigarElement elem = elems.get(i); CigarElement next = elems.get(i+1); if ((elem.getOperator() == CigarOperator.DELETION || elem.getOperator() == CigarOperator.INSERTION) && prev.getOperator() == CigarOperator.MATCH_OR_MISMATCH && next.getOperator() == CigarOperator.MATCH_OR_MISMATCH) { // subset cigar here and attempt to shift Cigar subCigar = new Cigar(Arrays.asList(prev, elem, next)); String subSeq = seq.substring(readOffset, readOffset+subCigar.getReadLength()); Cigar newCigar = shiftIndelsLeft(refStart + refOffset, refStart + refOffset + subCigar.getReferenceLength(), chromosome, subCigar, subSeq, c2r); //TODO: Merge abutting indels if applicable elems.set(i-1, newCigar.getCigarElement(0)); elems.set(i, newCigar.getCigarElement(1)); if (newCigar.getCigarElements().size() == 3) { elems.set(i+1, newCigar.getCigarElement(2)); } else { elems.remove(i+1); elemSize -= 1; } } } mergeAbuttingElements(elems); return new Cigar(elems); } catch (RuntimeException e) { e.printStackTrace(); Logger.error("Error on: " + refStart + ", " + refEnd + "," + chromosome + ", " + cigar + ", " + seq); throw e; } }
Example 20
Source File: ReadRecord.java From hmftools with GNU General Public License v3.0 | 4 votes |
public static final List<int[]> generateMappedCoords(final Cigar cigar, int posStart) { final List<int[]> mappedCoords = Lists.newArrayList(); // first establish whether the read is split across 2 distant regions, and if so which it maps to int posOffset = 0; boolean continueRegion = false; for(CigarElement element : cigar.getCigarElements()) { if(element.getOperator() == CigarOperator.S) { // nothing to skip } else if(element.getOperator() == D) { posOffset += element.getLength(); continueRegion = true; } else if(element.getOperator() == CigarOperator.I) { // nothing to skip continueRegion = true; } else if(element.getOperator() == CigarOperator.N) { posOffset += element.getLength(); continueRegion = false; } else if(element.getOperator() == CigarOperator.M) { int readStartPos = posStart + posOffset; int readEndPos = readStartPos + element.getLength() - 1; if(continueRegion && !mappedCoords.isEmpty()) { int[] lastRegion = mappedCoords.get(mappedCoords.size() - 1); lastRegion[SE_END] = readEndPos; } else { mappedCoords.add(new int[] { readStartPos, readEndPos }); } posOffset += element.getLength(); continueRegion = false; } } return mappedCoords; }