Java Code Examples for htsjdk.samtools.CigarOperator#DELETION
The following examples show how to use
htsjdk.samtools.CigarOperator#DELETION .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Checks if cigar starts with a deletion (ignoring any clips at the beginning). */ private static boolean startsOrEndsWithDeletionIgnoringClips(final List<CigarElement> elems) { for (final boolean leftSide : new boolean[] {true, false}) { for (final CigarElement elem : leftSide ? elems : Lists.reverse(elems)) { final CigarOperator op = elem.getOperator(); if (op == CigarOperator.DELETION) { //first non-clipping is deletion return true; } else if (!op.isClipping()) { // first non-clipping is non deletion break; } } } return false; }
Example 2
Source File: ReadClassifier.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void checkBigIndel( final List<CigarElement> cigarElements, final GATKRead read, final List<BreakpointEvidence> evidenceList ) { int locus = read.getStart(); for ( final CigarElement ele : cigarElements ) { final CigarOperator op = ele.getOperator(); if ( ele.getLength() >= MIN_INDEL_LEN ) { if ( op == CigarOperator.INSERTION ) { evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus)); } else if ( op == CigarOperator.DELETION ) { evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus+ele.getLength()/2)); } } if ( op.consumesReferenceBases() ) { locus += ele.getLength(); } } }
Example 3
Source File: BaseRecalibrationEngine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
protected static boolean[] calculateKnownSites( final GATKRead read, final Iterable<? extends Locatable> knownSites ) { final int readLength = read.getLength(); final boolean[] knownSitesArray = new boolean[readLength];//initializes to all false final Cigar cigar = read.getCigar(); final int softStart = read.getSoftStart(); final int softEnd = read.getSoftEnd(); for ( final Locatable knownSite : knownSites ) { if (knownSite.getEnd() < softStart || knownSite.getStart() > softEnd) { // knownSite is outside clipping window for the read, ignore continue; } final Pair<Integer, CigarOperator> featureStartAndOperatorOnRead = ReadUtils.getReadIndexForReferenceCoordinate(read, knownSite.getStart()); int featureStartOnRead = featureStartAndOperatorOnRead.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND ? 0 : featureStartAndOperatorOnRead.getLeft(); if (featureStartAndOperatorOnRead.getRight() == CigarOperator.DELETION) { featureStartOnRead--; } final Pair<Integer, CigarOperator> featureEndAndOperatorOnRead = ReadUtils.getReadIndexForReferenceCoordinate(read, knownSite.getEnd()); int featureEndOnRead = featureEndAndOperatorOnRead.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND ? readLength : featureEndAndOperatorOnRead.getLeft(); if( featureStartOnRead > readLength ) { featureStartOnRead = featureEndOnRead = readLength; } Arrays.fill(knownSitesArray, Math.max(0, featureStartOnRead), Math.min(readLength, featureEndOnRead + 1), true); } return knownSitesArray; }
Example 4
Source File: CigarBuilder.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public Cigar make(final boolean allowEmpty) { Utils.validate(!(section == Section.LEFT_SOFT_CLIP && cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP), "cigar is completely soft-clipped"); trailingDeletionBasesRemovedInMake = 0; if (removeDeletionsAtEnds && lastOperator == CigarOperator.DELETION) { trailingDeletionBasesRemovedInMake = cigarElements.get(cigarElements.size() - 1).getLength(); cigarElements.remove(cigarElements.size() - 1); } else if (removeDeletionsAtEnds && lastTwoElementsWereDeletionAndInsertion()) { trailingDeletionBasesRemovedInMake = cigarElements.get(cigarElements.size() - 2).getLength(); cigarElements.remove(cigarElements.size() - 2); } Utils.validate(allowEmpty || !cigarElements.isEmpty(), "No cigar elements left after removing leading and trailing deletions."); return new Cigar(cigarElements); // removing flanking deletions may cause an empty cigar to be output. We do not throw an error or return null. }
Example 5
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Checks if cigar has consecutive I/D elements. */ private static boolean hasConsecutiveIndels(final List<CigarElement> elems) { boolean prevIndel = false; for (final CigarElement elem : elems) { final CigarOperator op = elem.getOperator(); final boolean isIndel = (op == CigarOperator.INSERTION || op == CigarOperator.DELETION); if (prevIndel && isIndel) { return true; } prevIndel = isIndel; } return false; }
Example 6
Source File: RealignmentEngine.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
public static boolean supportsVariant(final GATKRead read, final VariantContext vc, int indelStartTolerance) { final byte[] readBases = read.getBasesNoCopy(); final Pair<Integer, CigarOperator> offsetAndOperatorInRead = ReadUtils.getReadIndexForReferenceCoordinate(read, vc.getStart()); if ( offsetAndOperatorInRead.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND) { return false; } else if (vc.isSNP() && offsetAndOperatorInRead.getRight() == CigarOperator.DELETION) { return false; } final int variantPositionInRead = offsetAndOperatorInRead.getLeft(); for (final Allele allele : vc.getAlternateAlleles()) { final int referenceLength = vc.getReference().length(); if (allele.length() == referenceLength) { // SNP or MNP -- check whether read bases match variant allele bases if (allele.basesMatch(ArrayUtils.subarray(readBases, variantPositionInRead, Math.min(variantPositionInRead + allele.length(), readBases.length)))) { return true; } } else { // indel -- check if the read has the right CIGAR operator near position -- we don't require exact an exact match because indel representation is non-unique final boolean isDeletion = allele.length() < referenceLength; int readPosition = 0; // offset by 1 because the variant position is one base before the first indel base for (final CigarElement cigarElement : read.getCigarElements()) { if (Math.abs(readPosition - variantPositionInRead) <= indelStartTolerance) { if (isDeletion && mightSupportDeletion(cigarElement) || !isDeletion && mightSupportInsertion(cigarElement)) { return true; } } else { readPosition += cigarElement.getLength(); } } } } return false; }
Example 7
Source File: IndelShifter.java From abra2 with MIT License | 4 votes |
public Cigar shiftAllIndelsLeft(int refStart, int refEnd, String chromosome, Cigar cigar, String seq, CompareToReference2 c2r) { try { List<CigarElement> elems = new ArrayList<CigarElement>(cigar.getCigarElements()); int elemSize = elems.size(); for (int i=1; i<elemSize-1; i++) { int refOffset = 0; int readOffset = 0; for (int j=0; j<i-1; j++) { refOffset += getRefOffset(elems.get(j)); readOffset += getReadOffset(elems.get(j)); } CigarElement prev = elems.get(i-1); CigarElement elem = elems.get(i); CigarElement next = elems.get(i+1); if ((elem.getOperator() == CigarOperator.DELETION || elem.getOperator() == CigarOperator.INSERTION) && prev.getOperator() == CigarOperator.MATCH_OR_MISMATCH && next.getOperator() == CigarOperator.MATCH_OR_MISMATCH) { // subset cigar here and attempt to shift Cigar subCigar = new Cigar(Arrays.asList(prev, elem, next)); String subSeq = seq.substring(readOffset, readOffset+subCigar.getReadLength()); Cigar newCigar = shiftIndelsLeft(refStart + refOffset, refStart + refOffset + subCigar.getReferenceLength(), chromosome, subCigar, subSeq, c2r); //TODO: Merge abutting indels if applicable elems.set(i-1, newCigar.getCigarElement(0)); elems.set(i, newCigar.getCigarElement(1)); if (newCigar.getCigarElements().size() == 3) { elems.set(i+1, newCigar.getCigarElement(2)); } else { elems.remove(i+1); elemSize -= 1; } } } mergeAbuttingElements(elems); return new Cigar(elems); } catch (RuntimeException e) { e.printStackTrace(); Logger.error("Error on: " + refStart + ", " + refEnd + "," + chromosome + ", " + cigar + ", " + seq); throw e; } }
Example 8
Source File: CigarBuilder.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private boolean lastTwoElementsWereDeletionAndInsertion() { return lastOperator == CigarOperator.INSERTION && cigarElements.size() > 1 && cigarElements.get(cigarElements.size() - 2).getOperator() == CigarOperator.DELETION; }
Example 9
Source File: AlignmentUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
private static Pair<byte[], byte[]> getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte basePadCharacter, final byte qualityPadCharacter) { Utils.nonNull(read); // As this code is performance sensitive in the HaplotypeCaller, we elect to use the noCopy versions of these getters. // We can do this because we don't mutate base or quality arrays in this method or in its accessors final byte[] bases = read.getBasesNoCopy(); final byte[] baseQualities = read.getBaseQualitiesNoCopy(); final int numCigarElements = read.numCigarElements(); boolean sawIndel = false; // Check if the cigar contains indels // Note that we don't call ContainsOperator() here twice to avoid the performance hit of building stream iterators twice for (int i = 0; i < numCigarElements; i++) { final CigarOperator e = read.getCigarElement(i).getOperator(); if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) { sawIndel = true; break; } } if (!sawIndel) { return new ImmutablePair<>(bases, baseQualities); } else { final int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesAndSoftClips(read.getCigarElements(), 0, numCigarElements); final byte[] paddedBases = new byte[numberRefBasesIncludingSoftclips]; final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips]; int literalPos = 0; int paddedPos = 0; for ( int i = 0; i < numCigarElements; i++ ) { final CigarElement ce = read.getCigarElement(i); final CigarOperator co = ce.getOperator(); if (co.consumesReadBases()) { if (!co.consumesReferenceBases()) { literalPos += ce.getLength(); //skip inserted bases } else { System.arraycopy(bases, literalPos, paddedBases, paddedPos, ce.getLength()); System.arraycopy(baseQualities, literalPos, paddedBaseQualities, paddedPos, ce.getLength()); literalPos += ce.getLength(); paddedPos += ce.getLength(); } } else if (co.consumesReferenceBases()) { for ( int j = 0; j < ce.getLength(); j++ ) { //pad deleted bases paddedBases[paddedPos] = basePadCharacter; paddedBaseQualities[paddedPos] = qualityPadCharacter; paddedPos++; } } } return new ImmutablePair<>(paddedBases, paddedBaseQualities); } }
Example 10
Source File: Utils.java From cramtools with Apache License 2.0 | 4 votes |
/** * A rip off samtools bam_md.c * * @param record * @param ref * @param calcMD * @param calcNM */ public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) { if (!calcMD && !calcNM) return; Cigar cigar = record.getCigar(); List<CigarElement> cigarElements = cigar.getCigarElements(); byte[] seq = record.getReadBases(); int start = record.getAlignmentStart() - 1; int i, x, y, u = 0; int nm = 0; StringBuffer str = new StringBuffer(); int size = cigarElements.size(); for (i = y = 0, x = start; i < size; ++i) { CigarElement ce = cigarElements.get(i); int j, l = ce.getLength(); CigarOperator op = ce.getOperator(); if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (j = 0; j < l; ++j) { int z = y + j; if (ref.length <= x + j) break; // out of boundary int c1 = 0; int c2 = 0; // try { c1 = seq[z]; c2 = ref[x + j]; if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match ++u; } else { str.append(u); str.appendCodePoint(ref[x + j]); u = 0; ++nm; } } if (j < l) break; x += l; y += l; } else if (op == CigarOperator.DELETION) { str.append(u); str.append('^'); for (j = 0; j < l; ++j) { if (ref[x + j] == 0) break; str.appendCodePoint(ref[x + j]); } u = 0; if (j < l) break; x += l; nm += l; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { y += l; if (op == CigarOperator.INSERTION) nm += l; } else if (op == CigarOperator.SKIPPED_REGION) { x += l; } } str.append(u); if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString()); if (calcNM) record.setAttribute(SAMTag.NM.name(), nm); }
Example 11
Source File: AlignmentsTags.java From cramtools with Apache License 2.0 | 4 votes |
/** * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags} * but allows for reference excerpt instead of a full reference sequence. * * @param record * SAMRecord to inject NM and MD tags into * @param ref * reference sequence bases, may be a subsequence starting at * refOffest * @param refOffset * array offset of the reference bases, 0 is the same as the * whole sequence. This value will be subtracted from the * reference array index in calculations. * @param calcMD * calculate MD tag if true * @param calcNM * calculate NM tag if true */ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset, final boolean calcMD, final boolean calcNM) { if (!calcMD && !calcNM) return; final Cigar cigar = record.getCigar(); final List<CigarElement> cigarElements = cigar.getCigarElements(); final byte[] seq = record.getReadBases(); final int start = record.getAlignmentStart() - 1; int i, x, y, u = 0; int nm = 0; final StringBuilder str = new StringBuilder(); final int size = cigarElements.size(); for (i = y = 0, x = start; i < size; ++i) { final CigarElement ce = cigarElements.get(i); int j; final int length = ce.getLength(); final CigarOperator op = ce.getOperator(); if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (j = 0; j < length; ++j) { final int z = y + j; if (refOffset + ref.length <= x + j) break; // out of boundary int c1 = 0; int c2 = 0; // try { c1 = seq[z]; c2 = ref[x + j - refOffset]; if ((c1 == c2) || c1 == 0) { // a match ++u; } else { str.append(u); str.appendCodePoint(ref[x + j - refOffset]); u = 0; ++nm; } } if (j < length) break; x += length; y += length; } else if (op == CigarOperator.DELETION) { str.append(u); str.append('^'); for (j = 0; j < length; ++j) { if (ref[x + j - refOffset] == 0) break; str.appendCodePoint(ref[x + j - refOffset]); } u = 0; if (j < length) break; x += length; nm += length; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { y += length; if (op == CigarOperator.INSERTION) nm += length; } else if (op == CigarOperator.SKIPPED_REGION) { x += length; } } str.append(u); if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString()); if (calcNM) record.setAttribute(SAMTag.NM.name(), nm); }