Java Code Examples for htsjdk.samtools.SAMRecord#getCigar()

The following examples show how to use htsjdk.samtools.SAMRecord#getCigar() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NumberEvents.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public static int numberOfEvents(@NotNull final SAMRecord record) {
    Object nm = record.getAttribute("NM");
    if (!(nm instanceof Integer)) {
        return 0;
    }

    int additionalIndels = 0;
    int softClips = 0;
    for (CigarElement cigarElement : record.getCigar()) {
        switch (cigarElement.getOperator()) {
            case D:
            case I:
                additionalIndels += (cigarElement.getLength() - 1);
                break;
            case S:
                softClips++;
                break;
        }
    }

    return (Integer) nm - additionalIndels + softClips;
}
 
Example 2
Source File: HLA.java    From kourami with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public boolean startWIns(SAMRecord sr){
Cigar cigar = sr.getCigar();
if(cigar == null){
    return true;
}else{
    CigarOperator op = cigar.getCigarElements().get(0).getOperator();
    if(op == CigarOperator.I){
	if(HLA.DEBUG)
	    HLA.log.appendln("SKIPPING(Start with Insertion):\t" + sr.getReadName());
	return true;
	
    }
}
return false;
   }
 
Example 3
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Remove leading or trailing soft clips from the input read.
 * Does not modify a read entirely comprised of soft clips.
 */
public static void removeSoftClips(SAMRecord read) {
	
	Cigar cigar = read.getCigar();
	
	CigarElement firstElement = cigar.getCigarElement(0);
	CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
	
	if ((firstElement.getOperator() == CigarOperator.S) ||
		(lastElement.getOperator() == CigarOperator.S)) {
	
		Cigar newCigar = new Cigar();
		
		String bases = read.getReadString();
		//String qualities = read.getBaseQualityString();
				
		if (firstElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(firstElement.getLength(), bases.length());
			//qualities = qualities.substring(firstElement.getLength(), qualities.length()-1);
		} else {
			newCigar.add(firstElement);
		}
		
		for (int i=1; i<cigar.numCigarElements()-1; i++) {
			newCigar.add(cigar.getCigarElement(i));
		}
		
		if (lastElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(0, bases.length() - lastElement.getLength());
			//qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1);
		} else {
			newCigar.add(lastElement);
		}
		
		read.setCigar(newCigar);
		read.setReadString(bases);
		//read.setBaseQualityString(qualities);
	}
}
 
Example 4
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Replace hard clips with soft clips.
 */
public static void replaceHardClips(SAMRecord read) {
	Cigar cigar = read.getCigar();
	
	if (cigar.getCigarElements().size() > 0) {
		CigarElement firstElement = cigar.getCigarElement(0);
		CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
		
		if ((firstElement.getOperator() == CigarOperator.H) ||
			(lastElement.getOperator() == CigarOperator.H)) {
			
			Cigar newCigar = new Cigar();
			
			boolean isFirst = true;
			
			for (CigarElement element : cigar.getCigarElements()) {
				if (element.getOperator() != CigarOperator.H) {
					newCigar.add(element);
				} else {
					CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S);
					newCigar.add(softClip);
					
					if (isFirst) {
						read.setReadString(padBases(element.getLength()) + read.getReadString());
					} else {
						read.setReadString(read.getReadString() + padBases(element.getLength()));							
					}
				}
				
				isFirst = false;
			}
			
			read.setCigar(newCigar);
		}
	}
}
 
Example 5
Source File: SpliceJunctionCounter.java    From abra2 with MIT License 5 votes vote down vote up
private List<SpliceJunction> getJunctions(SAMRecord read) {
	List<SpliceJunction> junctions = new ArrayList<SpliceJunction>();
	
	int pos = read.getAlignmentStart();
	
	for (CigarElement elem : read.getCigar()) {
		switch (elem.getOperator()) {
			case D:
			case M:
				pos += elem.getLength();
				break;
			case N:
				junctions.add(new SpliceJunction(read.getReferenceName(), pos, pos+elem.getLength()-1));
				pos += elem.getLength();
				break;
			case S:
			case I:
			case H:
				// NOOP
				break;
			default:
				throw new UnsupportedOperationException("Unsupported Cigar Operator: " + elem.getOperator());
		}
	}
	
	return junctions;
}
 
Example 6
Source File: ReadContextCounter.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private int indelLength(@NotNull final SAMRecord record) {
    int result = 0;
    for (CigarElement cigarElement : record.getCigar()) {
        switch (cigarElement.getOperator()) {
            case I:
            case D:
                result += cigarElement.getLength();
        }

    }

    return result;
}
 
Example 7
Source File: SAMRecords.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public static int rightSoftClip(@NotNull final SAMRecord record) {
    Cigar cigar = record.getCigar();
    if (cigar.numCigarElements() > 0) {
        CigarElement lastLement = cigar.getCigarElement(cigar.numCigarElements() - 1);
        if (lastLement.getOperator() == CigarOperator.S) {
            return lastLement.getLength();
        }
    }

    return 0;
}
 
Example 8
Source File: SAMRecords.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public static int leftSoftClip(@NotNull final SAMRecord record) {
    Cigar cigar = record.getCigar();
    if (cigar.numCigarElements() > 0) {
        CigarElement firstElement = cigar.getCigarElement(0);
        if (firstElement.getOperator() == CigarOperator.S) {
            return firstElement.getLength();
        }
    }

    return 0;
}
 
Example 9
Source File: ReadRecord.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public static ReadRecord from(final SAMRecord record)
{
    ReadRecord read = new ReadRecord(
            record.getReadName(), record.getReferenceName(), record.getStart(), record.getEnd(),
            record.getReadString(), record.getCigar(), record.getInferredInsertSize(), record.getFlags(),
            record.getMateReferenceName(), record.getMateAlignmentStart());

    read.setSuppAlignment(record.getStringAttribute(SUPPLEMENTARY_ATTRIBUTE));
    return read;
}
 
Example 10
Source File: FilterBam.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Rejects reads if the sum of the cigar string bases is less than M_BASES_IN_CIGAR, reject the read.
 * Don't process if M_BASES_IN_CIGAR is -1.
 * @return return false if the sum of the matching bases in the cigar is greater than the threshold.
 */
boolean rejectOnCigar(final SAMRecord r) {
	if (this.SUM_MATCHING_BASES==null) return (false);
	Cigar c = r.getCigar();
	int count=0;
	for (CigarElement ce: c.getCigarElements())
		if (ce.getOperator()==CigarOperator.M)
			count+=ce.getLength();
	if (count>=this.SUM_MATCHING_BASES)
		return false;
	return true;
}
 
Example 11
Source File: CigarTraversal.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
public static void traverseCigar(@NotNull final SAMRecord record, @NotNull final CigarHandler handler) {
    final Cigar cigar = record.getCigar();

    int readIndex = 0;
    int refBase = record.getAlignmentStart();

    for (int i = 0; i < cigar.numCigarElements(); i++) {
        final CigarElement e = cigar.getCigarElement(i);
        switch (e.getOperator()) {
            case H:
                break; // ignore hard clips
            case P:
                break; // ignore pads
            case S:
                if (i == 0) {
                    handler.handleLeftSoftClip(record, e);
                } else if (i == cigar.numCigarElements() - 1) {
                    handler.handleRightSoftClip(record, e, readIndex, refBase);
                }
                readIndex += e.getLength();
                break; // soft clip read bases
            case N:
                handler.handleSkippedReference(record, e, readIndex - 1, refBase - 1);
                refBase += e.getLength();
                break;  // reference skip
            case D:
                handler.handleDelete(record, e, readIndex - 1, refBase - 1);
                refBase += e.getLength();
                break;
            case I:
                // TODO: Handle 1I150M
                int refIndex = refBase - 1 - record.getAlignmentStart();
                if (refIndex >= 0) {
                    handler.handleInsert(record, e, readIndex - 1, refBase - 1);
                }
                readIndex += e.getLength();
                break;
            case M:
            case EQ:
            case X:
                boolean isFollowedByIndel = i < cigar.numCigarElements() - 1 && cigar.getCigarElement(i + 1).getOperator().isIndel();
                final CigarElement element = isFollowedByIndel ? new CigarElement(e.getLength() - 1, e.getOperator()) : e;
                handler.handleAlignment(record, element, readIndex, refBase);
                readIndex += e.getLength();
                refBase += e.getLength();
                break;
            default:
                throw new IllegalStateException("Case statement didn't deal with op: " + e.getOperator() + "in CIGAR: " + cigar);
        }
    }
}
 
Example 12
Source File: SimpleCaller.java    From abra2 with MIT License 4 votes vote down vote up
private BaseInfo getBaseAtReferencePosition(SAMRecord read, int refPos) {
	boolean isNearEdge = false;
	boolean isIndel = false;
	int alignmentStart = read.getAlignmentStart();
	Cigar cigar = read.getCigar();
	
	char base = 'N';
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	
	for (CigarElement element : cigar.getCigarElements()) {
					
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
			
			if (currRefPos > refPos) {  // TODO: double check end of read base here...
				
				int offset = currRefPos - refPos;
				
				if ((offset < 3) || (offset+3 >= element.getLength())) {
					// This position is within 3 bases of start/end of alignment or clipping or indel.
					// Tag this base for evaluation downstream.
					isNearEdge = true;
				}
				
				readIdx -= offset;
				if ((readIdx < 0) || (readIdx >= read.getReadBases().length)) {
					System.err.println("Read index out of bounds for read: " + read.getSAMString());
					break;
				}
				
				if (read.getBaseQualities()[readIdx] >= MIN_BASE_QUALITY) {
					base = (char) read.getReadBases()[readIdx];
				}
				break;
			}
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos) {
				//TODO: Handle insertions
				isIndel = true;
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (refPos >= currRefPos && refPos <= currRefPos+element.getLength()) {
				//TODO: Handle deletions
				isIndel = true;
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		}			
	}
	
	return new BaseInfo(Character.toUpperCase(base), isNearEdge, isIndel);
}
 
Example 13
Source File: HLA.java    From kourami with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public boolean qcCheck(SAMRecord sr){
Cigar cigar = sr.getCigar();
int rLen = sr.getReadLength();
int effectiveLen = 0;
if(cigar==null) 
    return false;
else{
    for(final CigarElement ce : cigar.getCigarElements()){
	CigarOperator op = ce.getOperator();
	int cigarLen = ce.getLength();
	switch(op)
	    {
	    case M:
		{
		    effectiveLen += cigarLen;
		    break;
		}
	    case I:
		{
		    effectiveLen += cigarLen;
		    break;
		}
	    default: 
		break;
	    }
    }
}
boolean readdebug = false;
if(readdebug){
    HLA.log.appendln(sr.getSAMString());
    HLA.log.appendln("EffectiveLen:\t" + effectiveLen);
    HLA.log.appendln("ReadLen:\t" + rLen);
}
Integer i = sr.getIntegerAttribute("NM");
int nm = 0;
if(i!=null)
    nm = i.intValue();
if(readdebug)
    HLA.log.appendln("NM=\t" + nm);
if(nm < 16){
    if(readdebug)
	HLA.log.appendln("PASSWED QC");
    return true;
}
if(readdebug){
    HLA.log.appendln("FAILED QC");
    HLA.log.appendln(sr.getSAMString());
}
return false;
   }
 
Example 14
Source File: SAMRecordUtilsTest.java    From abra2 with MIT License 4 votes vote down vote up
private Cigar getCigar(String str) {
	SAMRecord read = new SAMRecord(null);
	read.setCigarString(str);
	return read.getCigar();
}
 
Example 15
Source File: BamToDetailsConversion.java    From chipster with MIT License 4 votes vote down vote up
/**
 * Find reads in a given range.
 * 
 * <p>
 * TODO add cigar to the list of returned values
 * <p>
 * TODO add pair information to the list of returned values
 * 
 * @param request
 * @return
 * @throws InterruptedException 
 */
@Override
protected void processDataRequest(DataRequest request) throws InterruptedException {
	
	// Read the given region
	CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue());
	
	// Produce results
	while (iterator.hasNext()) {

		List<Feature> responseList = new LinkedList<Feature>();

		// Split results into chunks
		for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) {
			SAMRecord record = iterator.next();

			// Region for this read
			Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);

			// Values for this read
			LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

			Feature read = new Feature(recordRegion, values);

			if (request.getRequestedContents().contains(DataType.ID)) {
				values.put(DataType.ID, record.getReadName());
			}

			if (request.getRequestedContents().contains(DataType.STRAND)) {
				values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));					
			}

			if (request.getRequestedContents().contains(DataType.QUALITY)) {
				values.put(DataType.QUALITY, record.getBaseQualityString());
			}

			if (request.getRequestedContents().contains(DataType.CIGAR)) {
				Cigar cigar = new Cigar(read, record.getCigar());
				values.put(DataType.CIGAR, cigar);
			}

			// TODO Deal with "=" and "N" in read string
			if (request.getRequestedContents().contains(DataType.SEQUENCE)) {
				String seq = record.getReadString();
				values.put(DataType.SEQUENCE, seq);
			}

			if (request.getRequestedContents().contains(DataType.MATE_POSITION)) {
				
				BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(),
						new Chromosome(record.getMateReferenceName()));
				
				values.put(DataType.MATE_POSITION, mate);
			}
			
			if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) {
				Object ng = record.getAttribute("NH");
				if (ng != null) {
					values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH"));
				}
			}
			
			/*
			 * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types
			 * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding
			 * the same reads to their read sets again and again.
			 */
			responseList.add(read);
		}

		// Send result			
		super.createDataResult(new DataResult(request.getStatus(), responseList));			
	}

	// We are done
	iterator.close();
}
 
Example 16
Source File: BamToCoverageConversion.java    From chipster with MIT License 4 votes vote down vote up
private void calculateCoverage(DataRequest request, BpCoord from, BpCoord to) throws InterruptedException {	
			
	//query data for full average bins, because merging them later would be difficult
	long start = CoverageTool.getBin(from.bp);		
	long end = CoverageTool.getBin(to.bp) + CoverageTool.BIN_SIZE - 1;

	//if end coordinate is smaller than 1 Picard returns a full chromosome and we'll run out of memory
	if (end < 1) {
		end = 1;
	}
			
	CloseableIterator<SAMRecord> iterator = dataSource.query(from.chr, (int)start, (int)end);		
	
	BaseStorage forwardBaseStorage = new BaseStorage();
	BaseStorage reverseBaseStorage = new BaseStorage();
	
	while (iterator.hasNext()) {
		
		SAMRecord record = iterator.next();
		
		LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

		Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);
		
		Feature read = new Feature(recordRegion, values);

		values.put(DataType.ID, record.getReadName());
		
		values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));
		
		Cigar cigar = new Cigar(read, record.getCigar());
		values.put(DataType.CIGAR, cigar);
		
		String seq = record.getReadString();
		values.put(DataType.SEQUENCE, seq);
		
		
		// Split read into continuous blocks (elements) by using the cigar
		List<ReadPart> parts = Cigar.splitElements(read);
		
		for (ReadPart part : parts) {				 
			
			if (read.values.get(DataType.STRAND) == Strand.FORWARD) {
				forwardBaseStorage.addNucleotideCounts(part);
			} else if (read.values.get(DataType.STRAND) == Strand.REVERSE) {
				reverseBaseStorage.addNucleotideCounts(part);			
			}
		}						
	}
			
	// We are done
	iterator.close();
			
	/* Reads that overlap query regions create nucleotide counts outside the query region.
	 * Remove those extra nucleotide counts, because they don't contain all reads of those regions and would show
	 * wrong information. 
	 */
	Region filterRegion = new Region(start, end, from.chr);
	forwardBaseStorage.filter(filterRegion);
	reverseBaseStorage.filter(filterRegion);

	// Send result		
	LinkedList<Feature> resultList = new LinkedList<Feature>();					
	
	createResultList(from, forwardBaseStorage, resultList, Strand.FORWARD);
	createResultList(from, reverseBaseStorage, resultList, Strand.REVERSE);		
	
	LinkedList<Feature> averageCoverage = CoverageTool.average(resultList, from.chr);
	
	if (request.getRequestedContents().contains(DataType.COVERAGE)) {		
		super.createDataResult(new DataResult(request, resultList));
	}
	
	if (request.getRequestedContents().contains(DataType.COVERAGE_AVERAGE)) {
		super.createDataResult(new DataResult(request, averageCoverage));
	}
}
 
Example 17
Source File: Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * A rip off samtools bam_md.c
 * 
 * @param record
 * @param ref
 * @param calcMD
 * @param calcNM
 */
public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	Cigar cigar = record.getCigar();
	List<CigarElement> cigarElements = cigar.getCigarElements();
	byte[] seq = record.getReadBases();
	int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	StringBuffer str = new StringBuffer();

	int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		CigarElement ce = cigarElements.get(i);
		int j, l = ce.getLength();
		CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < l; ++j) {
				int z = y + j;

				if (ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j];

				if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j]);
					u = 0;
					++nm;
				}
			}
			if (j < l)
				break;
			x += l;
			y += l;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < l; ++j) {
				if (ref[x + j] == 0)
					break;
				str.appendCodePoint(ref[x + j]);
			}
			u = 0;
			if (j < l)
				break;
			x += l;
			nm += l;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += l;
			if (op == CigarOperator.INSERTION)
				nm += l;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += l;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}
 
Example 18
Source File: AlignmentsTags.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}