Java Code Examples for htsjdk.samtools.SAMRecord#getCigar()
The following examples show how to use
htsjdk.samtools.SAMRecord#getCigar() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NumberEvents.java From hmftools with GNU General Public License v3.0 | 6 votes |
public static int numberOfEvents(@NotNull final SAMRecord record) { Object nm = record.getAttribute("NM"); if (!(nm instanceof Integer)) { return 0; } int additionalIndels = 0; int softClips = 0; for (CigarElement cigarElement : record.getCigar()) { switch (cigarElement.getOperator()) { case D: case I: additionalIndels += (cigarElement.getLength() - 1); break; case S: softClips++; break; } } return (Integer) nm - additionalIndels + softClips; }
Example 2
Source File: HLA.java From kourami with BSD 3-Clause "New" or "Revised" License | 5 votes |
public boolean startWIns(SAMRecord sr){ Cigar cigar = sr.getCigar(); if(cigar == null){ return true; }else{ CigarOperator op = cigar.getCigarElements().get(0).getOperator(); if(op == CigarOperator.I){ if(HLA.DEBUG) HLA.log.appendln("SKIPPING(Start with Insertion):\t" + sr.getReadName()); return true; } } return false; }
Example 3
Source File: SAMRecordUtils.java From abra2 with MIT License | 5 votes |
/** * Remove leading or trailing soft clips from the input read. * Does not modify a read entirely comprised of soft clips. */ public static void removeSoftClips(SAMRecord read) { Cigar cigar = read.getCigar(); CigarElement firstElement = cigar.getCigarElement(0); CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1); if ((firstElement.getOperator() == CigarOperator.S) || (lastElement.getOperator() == CigarOperator.S)) { Cigar newCigar = new Cigar(); String bases = read.getReadString(); //String qualities = read.getBaseQualityString(); if (firstElement.getOperator() == CigarOperator.S) { bases = bases.substring(firstElement.getLength(), bases.length()); //qualities = qualities.substring(firstElement.getLength(), qualities.length()-1); } else { newCigar.add(firstElement); } for (int i=1; i<cigar.numCigarElements()-1; i++) { newCigar.add(cigar.getCigarElement(i)); } if (lastElement.getOperator() == CigarOperator.S) { bases = bases.substring(0, bases.length() - lastElement.getLength()); //qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1); } else { newCigar.add(lastElement); } read.setCigar(newCigar); read.setReadString(bases); //read.setBaseQualityString(qualities); } }
Example 4
Source File: SAMRecordUtils.java From abra2 with MIT License | 5 votes |
/** * Replace hard clips with soft clips. */ public static void replaceHardClips(SAMRecord read) { Cigar cigar = read.getCigar(); if (cigar.getCigarElements().size() > 0) { CigarElement firstElement = cigar.getCigarElement(0); CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1); if ((firstElement.getOperator() == CigarOperator.H) || (lastElement.getOperator() == CigarOperator.H)) { Cigar newCigar = new Cigar(); boolean isFirst = true; for (CigarElement element : cigar.getCigarElements()) { if (element.getOperator() != CigarOperator.H) { newCigar.add(element); } else { CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S); newCigar.add(softClip); if (isFirst) { read.setReadString(padBases(element.getLength()) + read.getReadString()); } else { read.setReadString(read.getReadString() + padBases(element.getLength())); } } isFirst = false; } read.setCigar(newCigar); } } }
Example 5
Source File: SpliceJunctionCounter.java From abra2 with MIT License | 5 votes |
private List<SpliceJunction> getJunctions(SAMRecord read) { List<SpliceJunction> junctions = new ArrayList<SpliceJunction>(); int pos = read.getAlignmentStart(); for (CigarElement elem : read.getCigar()) { switch (elem.getOperator()) { case D: case M: pos += elem.getLength(); break; case N: junctions.add(new SpliceJunction(read.getReferenceName(), pos, pos+elem.getLength()-1)); pos += elem.getLength(); break; case S: case I: case H: // NOOP break; default: throw new UnsupportedOperationException("Unsupported Cigar Operator: " + elem.getOperator()); } } return junctions; }
Example 6
Source File: ReadContextCounter.java From hmftools with GNU General Public License v3.0 | 5 votes |
private int indelLength(@NotNull final SAMRecord record) { int result = 0; for (CigarElement cigarElement : record.getCigar()) { switch (cigarElement.getOperator()) { case I: case D: result += cigarElement.getLength(); } } return result; }
Example 7
Source File: SAMRecords.java From hmftools with GNU General Public License v3.0 | 5 votes |
public static int rightSoftClip(@NotNull final SAMRecord record) { Cigar cigar = record.getCigar(); if (cigar.numCigarElements() > 0) { CigarElement lastLement = cigar.getCigarElement(cigar.numCigarElements() - 1); if (lastLement.getOperator() == CigarOperator.S) { return lastLement.getLength(); } } return 0; }
Example 8
Source File: SAMRecords.java From hmftools with GNU General Public License v3.0 | 5 votes |
public static int leftSoftClip(@NotNull final SAMRecord record) { Cigar cigar = record.getCigar(); if (cigar.numCigarElements() > 0) { CigarElement firstElement = cigar.getCigarElement(0); if (firstElement.getOperator() == CigarOperator.S) { return firstElement.getLength(); } } return 0; }
Example 9
Source File: ReadRecord.java From hmftools with GNU General Public License v3.0 | 5 votes |
public static ReadRecord from(final SAMRecord record) { ReadRecord read = new ReadRecord( record.getReadName(), record.getReferenceName(), record.getStart(), record.getEnd(), record.getReadString(), record.getCigar(), record.getInferredInsertSize(), record.getFlags(), record.getMateReferenceName(), record.getMateAlignmentStart()); read.setSuppAlignment(record.getStringAttribute(SUPPLEMENTARY_ATTRIBUTE)); return read; }
Example 10
Source File: FilterBam.java From Drop-seq with MIT License | 5 votes |
/** * Rejects reads if the sum of the cigar string bases is less than M_BASES_IN_CIGAR, reject the read. * Don't process if M_BASES_IN_CIGAR is -1. * @return return false if the sum of the matching bases in the cigar is greater than the threshold. */ boolean rejectOnCigar(final SAMRecord r) { if (this.SUM_MATCHING_BASES==null) return (false); Cigar c = r.getCigar(); int count=0; for (CigarElement ce: c.getCigarElements()) if (ce.getOperator()==CigarOperator.M) count+=ce.getLength(); if (count>=this.SUM_MATCHING_BASES) return false; return true; }
Example 11
Source File: CigarTraversal.java From hmftools with GNU General Public License v3.0 | 4 votes |
public static void traverseCigar(@NotNull final SAMRecord record, @NotNull final CigarHandler handler) { final Cigar cigar = record.getCigar(); int readIndex = 0; int refBase = record.getAlignmentStart(); for (int i = 0; i < cigar.numCigarElements(); i++) { final CigarElement e = cigar.getCigarElement(i); switch (e.getOperator()) { case H: break; // ignore hard clips case P: break; // ignore pads case S: if (i == 0) { handler.handleLeftSoftClip(record, e); } else if (i == cigar.numCigarElements() - 1) { handler.handleRightSoftClip(record, e, readIndex, refBase); } readIndex += e.getLength(); break; // soft clip read bases case N: handler.handleSkippedReference(record, e, readIndex - 1, refBase - 1); refBase += e.getLength(); break; // reference skip case D: handler.handleDelete(record, e, readIndex - 1, refBase - 1); refBase += e.getLength(); break; case I: // TODO: Handle 1I150M int refIndex = refBase - 1 - record.getAlignmentStart(); if (refIndex >= 0) { handler.handleInsert(record, e, readIndex - 1, refBase - 1); } readIndex += e.getLength(); break; case M: case EQ: case X: boolean isFollowedByIndel = i < cigar.numCigarElements() - 1 && cigar.getCigarElement(i + 1).getOperator().isIndel(); final CigarElement element = isFollowedByIndel ? new CigarElement(e.getLength() - 1, e.getOperator()) : e; handler.handleAlignment(record, element, readIndex, refBase); readIndex += e.getLength(); refBase += e.getLength(); break; default: throw new IllegalStateException("Case statement didn't deal with op: " + e.getOperator() + "in CIGAR: " + cigar); } } }
Example 12
Source File: SimpleCaller.java From abra2 with MIT License | 4 votes |
private BaseInfo getBaseAtReferencePosition(SAMRecord read, int refPos) { boolean isNearEdge = false; boolean isIndel = false; int alignmentStart = read.getAlignmentStart(); Cigar cigar = read.getCigar(); char base = 'N'; int readIdx = 0; int currRefPos = alignmentStart; for (CigarElement element : cigar.getCigarElements()) { if (element.getOperator() == CigarOperator.M) { readIdx += element.getLength(); currRefPos += element.getLength(); if (currRefPos > refPos) { // TODO: double check end of read base here... int offset = currRefPos - refPos; if ((offset < 3) || (offset+3 >= element.getLength())) { // This position is within 3 bases of start/end of alignment or clipping or indel. // Tag this base for evaluation downstream. isNearEdge = true; } readIdx -= offset; if ((readIdx < 0) || (readIdx >= read.getReadBases().length)) { System.err.println("Read index out of bounds for read: " + read.getSAMString()); break; } if (read.getBaseQualities()[readIdx] >= MIN_BASE_QUALITY) { base = (char) read.getReadBases()[readIdx]; } break; } } else if (element.getOperator() == CigarOperator.I) { if (currRefPos == refPos) { //TODO: Handle insertions isIndel = true; break; } readIdx += element.getLength(); } else if (element.getOperator() == CigarOperator.D) { if (refPos >= currRefPos && refPos <= currRefPos+element.getLength()) { //TODO: Handle deletions isIndel = true; break; } currRefPos += element.getLength(); } else if (element.getOperator() == CigarOperator.S) { readIdx += element.getLength(); } } return new BaseInfo(Character.toUpperCase(base), isNearEdge, isIndel); }
Example 13
Source File: HLA.java From kourami with BSD 3-Clause "New" or "Revised" License | 4 votes |
public boolean qcCheck(SAMRecord sr){ Cigar cigar = sr.getCigar(); int rLen = sr.getReadLength(); int effectiveLen = 0; if(cigar==null) return false; else{ for(final CigarElement ce : cigar.getCigarElements()){ CigarOperator op = ce.getOperator(); int cigarLen = ce.getLength(); switch(op) { case M: { effectiveLen += cigarLen; break; } case I: { effectiveLen += cigarLen; break; } default: break; } } } boolean readdebug = false; if(readdebug){ HLA.log.appendln(sr.getSAMString()); HLA.log.appendln("EffectiveLen:\t" + effectiveLen); HLA.log.appendln("ReadLen:\t" + rLen); } Integer i = sr.getIntegerAttribute("NM"); int nm = 0; if(i!=null) nm = i.intValue(); if(readdebug) HLA.log.appendln("NM=\t" + nm); if(nm < 16){ if(readdebug) HLA.log.appendln("PASSWED QC"); return true; } if(readdebug){ HLA.log.appendln("FAILED QC"); HLA.log.appendln(sr.getSAMString()); } return false; }
Example 14
Source File: SAMRecordUtilsTest.java From abra2 with MIT License | 4 votes |
private Cigar getCigar(String str) { SAMRecord read = new SAMRecord(null); read.setCigarString(str); return read.getCigar(); }
Example 15
Source File: BamToDetailsConversion.java From chipster with MIT License | 4 votes |
/** * Find reads in a given range. * * <p> * TODO add cigar to the list of returned values * <p> * TODO add pair information to the list of returned values * * @param request * @return * @throws InterruptedException */ @Override protected void processDataRequest(DataRequest request) throws InterruptedException { // Read the given region CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue()); // Produce results while (iterator.hasNext()) { List<Feature> responseList = new LinkedList<Feature>(); // Split results into chunks for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) { SAMRecord record = iterator.next(); // Region for this read Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr); // Values for this read LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>(); Feature read = new Feature(recordRegion, values); if (request.getRequestedContents().contains(DataType.ID)) { values.put(DataType.ID, record.getReadName()); } if (request.getRequestedContents().contains(DataType.STRAND)) { values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType)); } if (request.getRequestedContents().contains(DataType.QUALITY)) { values.put(DataType.QUALITY, record.getBaseQualityString()); } if (request.getRequestedContents().contains(DataType.CIGAR)) { Cigar cigar = new Cigar(read, record.getCigar()); values.put(DataType.CIGAR, cigar); } // TODO Deal with "=" and "N" in read string if (request.getRequestedContents().contains(DataType.SEQUENCE)) { String seq = record.getReadString(); values.put(DataType.SEQUENCE, seq); } if (request.getRequestedContents().contains(DataType.MATE_POSITION)) { BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(), new Chromosome(record.getMateReferenceName())); values.put(DataType.MATE_POSITION, mate); } if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) { Object ng = record.getAttribute("NH"); if (ng != null) { values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH")); } } /* * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding * the same reads to their read sets again and again. */ responseList.add(read); } // Send result super.createDataResult(new DataResult(request.getStatus(), responseList)); } // We are done iterator.close(); }
Example 16
Source File: BamToCoverageConversion.java From chipster with MIT License | 4 votes |
private void calculateCoverage(DataRequest request, BpCoord from, BpCoord to) throws InterruptedException { //query data for full average bins, because merging them later would be difficult long start = CoverageTool.getBin(from.bp); long end = CoverageTool.getBin(to.bp) + CoverageTool.BIN_SIZE - 1; //if end coordinate is smaller than 1 Picard returns a full chromosome and we'll run out of memory if (end < 1) { end = 1; } CloseableIterator<SAMRecord> iterator = dataSource.query(from.chr, (int)start, (int)end); BaseStorage forwardBaseStorage = new BaseStorage(); BaseStorage reverseBaseStorage = new BaseStorage(); while (iterator.hasNext()) { SAMRecord record = iterator.next(); LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>(); Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr); Feature read = new Feature(recordRegion, values); values.put(DataType.ID, record.getReadName()); values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType)); Cigar cigar = new Cigar(read, record.getCigar()); values.put(DataType.CIGAR, cigar); String seq = record.getReadString(); values.put(DataType.SEQUENCE, seq); // Split read into continuous blocks (elements) by using the cigar List<ReadPart> parts = Cigar.splitElements(read); for (ReadPart part : parts) { if (read.values.get(DataType.STRAND) == Strand.FORWARD) { forwardBaseStorage.addNucleotideCounts(part); } else if (read.values.get(DataType.STRAND) == Strand.REVERSE) { reverseBaseStorage.addNucleotideCounts(part); } } } // We are done iterator.close(); /* Reads that overlap query regions create nucleotide counts outside the query region. * Remove those extra nucleotide counts, because they don't contain all reads of those regions and would show * wrong information. */ Region filterRegion = new Region(start, end, from.chr); forwardBaseStorage.filter(filterRegion); reverseBaseStorage.filter(filterRegion); // Send result LinkedList<Feature> resultList = new LinkedList<Feature>(); createResultList(from, forwardBaseStorage, resultList, Strand.FORWARD); createResultList(from, reverseBaseStorage, resultList, Strand.REVERSE); LinkedList<Feature> averageCoverage = CoverageTool.average(resultList, from.chr); if (request.getRequestedContents().contains(DataType.COVERAGE)) { super.createDataResult(new DataResult(request, resultList)); } if (request.getRequestedContents().contains(DataType.COVERAGE_AVERAGE)) { super.createDataResult(new DataResult(request, averageCoverage)); } }
Example 17
Source File: Utils.java From cramtools with Apache License 2.0 | 4 votes |
/** * A rip off samtools bam_md.c * * @param record * @param ref * @param calcMD * @param calcNM */ public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) { if (!calcMD && !calcNM) return; Cigar cigar = record.getCigar(); List<CigarElement> cigarElements = cigar.getCigarElements(); byte[] seq = record.getReadBases(); int start = record.getAlignmentStart() - 1; int i, x, y, u = 0; int nm = 0; StringBuffer str = new StringBuffer(); int size = cigarElements.size(); for (i = y = 0, x = start; i < size; ++i) { CigarElement ce = cigarElements.get(i); int j, l = ce.getLength(); CigarOperator op = ce.getOperator(); if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (j = 0; j < l; ++j) { int z = y + j; if (ref.length <= x + j) break; // out of boundary int c1 = 0; int c2 = 0; // try { c1 = seq[z]; c2 = ref[x + j]; if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match ++u; } else { str.append(u); str.appendCodePoint(ref[x + j]); u = 0; ++nm; } } if (j < l) break; x += l; y += l; } else if (op == CigarOperator.DELETION) { str.append(u); str.append('^'); for (j = 0; j < l; ++j) { if (ref[x + j] == 0) break; str.appendCodePoint(ref[x + j]); } u = 0; if (j < l) break; x += l; nm += l; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { y += l; if (op == CigarOperator.INSERTION) nm += l; } else if (op == CigarOperator.SKIPPED_REGION) { x += l; } } str.append(u); if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString()); if (calcNM) record.setAttribute(SAMTag.NM.name(), nm); }
Example 18
Source File: AlignmentsTags.java From cramtools with Apache License 2.0 | 4 votes |
/** * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags} * but allows for reference excerpt instead of a full reference sequence. * * @param record * SAMRecord to inject NM and MD tags into * @param ref * reference sequence bases, may be a subsequence starting at * refOffest * @param refOffset * array offset of the reference bases, 0 is the same as the * whole sequence. This value will be subtracted from the * reference array index in calculations. * @param calcMD * calculate MD tag if true * @param calcNM * calculate NM tag if true */ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset, final boolean calcMD, final boolean calcNM) { if (!calcMD && !calcNM) return; final Cigar cigar = record.getCigar(); final List<CigarElement> cigarElements = cigar.getCigarElements(); final byte[] seq = record.getReadBases(); final int start = record.getAlignmentStart() - 1; int i, x, y, u = 0; int nm = 0; final StringBuilder str = new StringBuilder(); final int size = cigarElements.size(); for (i = y = 0, x = start; i < size; ++i) { final CigarElement ce = cigarElements.get(i); int j; final int length = ce.getLength(); final CigarOperator op = ce.getOperator(); if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (j = 0; j < length; ++j) { final int z = y + j; if (refOffset + ref.length <= x + j) break; // out of boundary int c1 = 0; int c2 = 0; // try { c1 = seq[z]; c2 = ref[x + j - refOffset]; if ((c1 == c2) || c1 == 0) { // a match ++u; } else { str.append(u); str.appendCodePoint(ref[x + j - refOffset]); u = 0; ++nm; } } if (j < length) break; x += length; y += length; } else if (op == CigarOperator.DELETION) { str.append(u); str.append('^'); for (j = 0; j < length; ++j) { if (ref[x + j - refOffset] == 0) break; str.appendCodePoint(ref[x + j - refOffset]); } u = 0; if (j < length) break; x += length; nm += length; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { y += length; if (op == CigarOperator.INSERTION) nm += length; } else if (op == CigarOperator.SKIPPED_REGION) { x += length; } } str.append(u); if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString()); if (calcNM) record.setAttribute(SAMTag.NM.name(), nm); }