Java Code Examples for htsjdk.samtools.SAMRecord#getReadName()
The following examples show how to use
htsjdk.samtools.SAMRecord#getReadName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FingerprintChecker.java From picard with MIT License | 6 votes |
private FingerprintIdDetails createUnknownFP(final Path samFile, final SAMRecord rec) { final PicardException e = new PicardException("Found read with no readgroup: " + rec.getReadName() + " in file: " + samFile); if (validationStringency != ValidationStringency.STRICT) { final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("<UNKNOWN>:::" + samFile.toUri().toString()); readGroupRecord.setLibrary("<UNKNOWN>"); readGroupRecord.setSample(defaultSampleID); readGroupRecord.setPlatformUnit("<UNKNOWN>.0.ZZZ"); if (validationStringency != ValidationStringency.SILENT && missingRGFiles.add(samFile)) { log.warn(e.getMessage()); log.warn("further messages from this file will be suppressed"); } return new FingerprintIdDetails(readGroupRecord, samFile.toUri().toString()); } else { log.error(e.getMessage()); throw e; } }
Example 2
Source File: TagOrderIteratorTest.java From Drop-seq with MIT License | 6 votes |
@Test(enabled=true) public void testCellSorting() { final Iterator<SAMRecord> toi = getTagOrderIterator(IN_FILE, "ZC"); int counter=0; String [] cellOrder={"ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","TGGCGAAGAGAT", "TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT"}; while (toi.hasNext()) { SAMRecord r = toi.next(); r.getReadName(); String cellName = r.getStringAttribute("ZC"); String expectedName = cellOrder[counter]; Assert.assertEquals(cellName, expectedName); counter++; } }
Example 3
Source File: ReadRecord.java From hmftools with GNU General Public License v3.0 | 5 votes |
public static ReadRecord from(final SAMRecord record) { ReadRecord read = new ReadRecord( record.getReadName(), record.getReferenceName(), record.getStart(), record.getEnd(), record.getReadString(), record.getCigar(), record.getInferredInsertSize(), record.getFlags(), record.getMateReferenceName(), record.getMateAlignmentStart()); read.setSuppAlignment(record.getStringAttribute(SUPPLEMENTARY_ATTRIBUTE)); return read; }
Example 4
Source File: PrimaryAlignmentKey.java From picard with MIT License | 5 votes |
public PrimaryAlignmentKey(final SAMRecord rec) { if (rec.isSecondaryOrSupplementary()) { throw new IllegalArgumentException("PrimaryAligmentKey cannot be a secondary or suplementary alignment"); } this.pairStatus = rec.getReadPairedFlag() ? (rec.getSecondOfPairFlag() ? PairStatus.SECOND : PairStatus.FIRST) : PairStatus.UNPAIRED; this.readName = rec.getReadName(); }
Example 5
Source File: SamToFastqWithTags.java From picard with MIT License | 5 votes |
private String assertTagExists(final SAMRecord record, final String tag) { String value = record.getStringAttribute(tag); if (value == null) { throw new PicardException("Record: " + record.getReadName() + " does have a value for tag: " + tag); } return value; }
Example 6
Source File: BamToBfqWriter.java From picard with MIT License | 5 votes |
/** * Writes out a SAMRecord in Maq fastq format * * @param codec the code to write to * @param rec the SAMRecord to write */ private void writeFastqRecord(final BinaryCodec codec, final SAMRecord rec) { // Trim the run barcode off the read name String readName = rec.getReadName(); if (namePrefix != null && readName.startsWith(namePrefix)) { readName = readName.substring(nameTrim); } // Writes the length of the read name and then the name (null-terminated) codec.writeString(readName, true, true); final char[] seqs = rec.getReadString().toCharArray(); final char[] quals = rec.getBaseQualityString().toCharArray(); int retainedLength = seqs.length; if (clipAdapters){ // adjust to a shorter length iff clipping tag exists Integer trimPoint = rec.getIntegerAttribute(ReservedTagConstants.XT); if (trimPoint != null) { ValidationUtils.validateArg(rec.getReadLength() == seqs.length, () -> "length of read and seqs differ. Found " + rec.getReadLength() + " and '" + seqs.length + "."); retainedLength = Math.min(seqs.length, Math.max(SEED_REGION_LENGTH, trimPoint -1)); } } // Write the length of the sequence codec.writeInt(basesToWrite != null ? basesToWrite : seqs.length); // Calculate and write the sequence and qualities final byte[] seqsAndQuals = encodeSeqsAndQuals(seqs, quals, retainedLength); codec.writeBytes(seqsAndQuals); }
Example 7
Source File: ReadsAtLocus.java From abra2 with MIT License | 5 votes |
public String toString() { String s = chr + ":" + position; for (SAMRecord read : reads) { s += "," + read.getReadName(); } return s; }
Example 8
Source File: SortedSAMWriter.java From abra2 with MIT License | 5 votes |
public MateKey getOriginalReadInfo(SAMRecord read) { int pos = read.getAlignmentStart(); boolean isUnmapped = read.getReadUnmappedFlag(); boolean isRc = read.getReadNegativeStrandFlag(); String yo = read.getStringAttribute("YO"); if (yo != null) { if (yo.startsWith("N/A")) { // Original alignment was unmapped isUnmapped = true; // isRc = false; // Orientation is forced to be opposite of mate during realignment // regardless of the original alignment. isRc = !read.getMateNegativeStrandFlag(); } else { String[] fields = yo.split(":"); pos = Integer.parseInt(fields[1]); isUnmapped = false; isRc = fields[2].equals("-") ? true : false; } } int readNum = read.getFirstOfPairFlag() ? 1 : 2; return new MateKey(read.getReadName(), pos, isUnmapped, isRc, readNum, read.getAlignmentStart()); }
Example 9
Source File: SortedSAMWriter.java From abra2 with MIT License | 5 votes |
public MateKey getMateKey(SAMRecord read) { // If mate is mapped, use read flag. // If mate is not mapped, use opposite of this read's RC flag boolean isMateRevOrientation = read.getMateUnmappedFlag() ? !read.getReadNegativeStrandFlag() : read.getMateNegativeStrandFlag(); int matePos = read.getMateUnmappedFlag() ? -1 : read.getMateAlignmentStart(); int mateNum = read.getFirstOfPairFlag() ? 2 : 1; return new MateKey(read.getReadName(), matePos, read.getMateUnmappedFlag(), isMateRevOrientation, mateNum, read.getAlignmentStart()); }
Example 10
Source File: AnnotationUtils.java From Drop-seq with MIT License | 5 votes |
public Map<Gene, LocusFunction> getLocusFunctionForReadByGene (final SAMRecord rec, final OverlapDetector<Gene> geneOverlapDetector) { Map<Gene, LocusFunction> result = new HashMap<>(); final Interval readInterval = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd(), rec.getReadNegativeStrandFlag(), rec.getReadName()); final Collection<Gene> overlappingGenes = geneOverlapDetector.getOverlaps(readInterval); for (Gene g: overlappingGenes) { LocusFunction f = getLocusFunctionForRead(rec, g); result.put(g, f); } return result; }
Example 11
Source File: UmiGraph.java From picard with MIT License | 5 votes |
/** * Creates a UmiGraph object * @param set Set of reads that have the same start-stop positions, these will be broken up by UMI * @param umiTag UMI tag used in the SAM/BAM/CRAM file ie. RX * @param molecularIdentifierTag Molecular identifier tag used in the SAM/BAM/CRAM file ie. MI * @param allowMissingUmis Allow for missing UMIs * @param duplexUmis Whether or not to use duplex of single strand UMIs */ UmiGraph(final DuplicateSet set, final String umiTag, final String molecularIdentifierTag, final boolean allowMissingUmis, final boolean duplexUmis) { this.umiTag = umiTag; this.molecularIdentifierTag = molecularIdentifierTag; this.allowMissingUmis = allowMissingUmis; this.duplexUmis = duplexUmis; records = set.getRecords(); // First ensure that all the reads have a UMI, if any reads are missing a UMI throw an exception unless allowMissingUmis is true for (final SAMRecord rec : records) { if (rec.getStringAttribute(umiTag) == null) { if (!allowMissingUmis) { throw new PicardException("Read " + rec.getReadName() + " does not contain a UMI with the " + umiTag + " attribute."); } rec.setAttribute(umiTag, ""); } } // Count the number of times each UMI occurs umiCounts = records.stream().collect(Collectors.groupingBy(p -> UmiUtil.getTopStrandNormalizedUmi(p, umiTag, duplexUmis), counting())); // At first we consider every UMI as if it were its own duplicate set numUmis = umiCounts.size(); umi = new String[numUmis]; duplicateSetID = IntStream.rangeClosed(0, numUmis-1).toArray(); int i = 0; for (final String key : umiCounts.keySet()) { umi[i] = key; i++; } }
Example 12
Source File: SAMRecordField.java From cramtools with Apache License 2.0 | 5 votes |
public Object getValue(SAMRecord record) { switch (type) { case QNAME: return record.getReadName(); case FLAG: return Integer.toString(record.getFlags()); case RNAME: return record.getReferenceName(); case POS: return Integer.toString(record.getAlignmentStart()); case MAPQ: return Integer.toString(record.getMappingQuality()); case CIGAR: return record.getCigarString(); case RNEXT: return record.getMateReferenceName(); case PNEXT: return Integer.toString(record.getMateAlignmentStart()); case TLEN: return Integer.toString(record.getInferredInsertSize()); case SEQ: return record.getReadString(); case QUAL: return record.getBaseQualityString(); case TAG: if (tagId == null) throw new IllegalArgumentException("Tag mismatch reqiues tag id. "); return record.getAttribute(tagId); default: throw new IllegalArgumentException("Unknown record field: " + type.name()); } }
Example 13
Source File: AggregatedTagOrderIteratorTest.java From Drop-seq with MIT License | 5 votes |
@Test(enabled=true) public void testGeneCellSorting() { final Iterator<List<SAMRecord>> groupingIterator = filterSortAndGroupByTags(IN_FILE, "GE", "ZC"); int counter=0; String [] geneOrder={"CHUK", "CHUK", "NKTR", "NKTR", "SNRPA1", "SNRPA1"}; String [] cellOrder={"ATCAGGGACAGA", "TGGCGAAGAGAT", "ATCAGGGACAGA", "TGGCGAAGAGAT", "ATCAGGGACAGA", "TGGCGAAGAGAT"}; int [] expectedSize = {2,2,2,2,2,2}; while (groupingIterator.hasNext()) { Collection<SAMRecord> r = groupingIterator.next(); int size = r.size(); SAMRecord rec = r.iterator().next(); String readName = rec.getReadName(); String cellName = rec.getStringAttribute("ZC"); String geneName = rec.getStringAttribute("GE"); System.out.println("Cell [" + cellName +"] geneName [" + geneName +"] size [" + size +"]"); Assert.assertEquals(cellName, cellOrder[counter]); Assert.assertEquals(geneName, geneOrder[counter]); Assert.assertEquals(size, expectedSize[counter]); counter++; } }
Example 14
Source File: TagValueProcessorTest.java From Drop-seq with MIT License | 5 votes |
@Test(enabled=true) public void testFilterRejectValues() { final Set<String> values = new HashSet<>(); values.add("ATCAGGGACAGA"); final Iterator<SAMRecord> it = TagOrderIteratorTest.getTagOrderIterator(IN_FILE, "ZC"); final Iterator<SAMRecord> toi = new FilteredIterator<SAMRecord>(it) { @Override public boolean filterOut(final SAMRecord rec) { return values.contains(rec.getAttribute("ZC")); } }; int counter=0; String [] cellOrder={"TGGCGAAGAGAT", "TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT"}; while (toi.hasNext()) { SAMRecord r = toi.next(); r.getReadName(); String cellName = r.getStringAttribute("ZC"); String expectedName = cellOrder[counter]; Assert.assertEquals(cellName, expectedName); counter++; } }
Example 15
Source File: SamToFastqTest.java From picard with MIT License | 5 votes |
void add(final SAMRecord record) { if (!record.getReadPairedFlag()) throw new PicardException("Record "+record.getReadName()+" is not paired"); if (record.getFirstOfPairFlag()) { if (mate1 != null) throw new PicardException("Mate 1 already set for record: "+record.getReadName()); mate1 = record ; } else if (record.getSecondOfPairFlag()) { if (mate2 != null) throw new PicardException("Mate 2 already set for record: "+record.getReadName()); mate2 = record ; } else throw new PicardException("Neither FirstOfPairFlag or SecondOfPairFlag is set for a paired record"); }
Example 16
Source File: SpermSeqMarkDuplicatesTest.java From Drop-seq with MIT License | 5 votes |
@Test // tests which reads are marked as duplicates by the read position strategy. public void testDetectDuplicatesByReadPositionStrategy() throws IOException { String [] duplicateReadNames={"READ1:2", "READ2:3"}; Set<String> dupes = new HashSet<String>(Arrays.asList(duplicateReadNames)); SpermSeqMarkDuplicates d = new SpermSeqMarkDuplicates(); d.INPUT=Arrays.asList(INPUT); d.OUTPUT=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".bam"); d.OUTPUT.deleteOnExit(); d.OUTPUT_STATS=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".pcr_duplicate_metrics"); d.OUTPUT_STATS.deleteOnExit(); Assert.assertEquals(0, d.doWork()); SamReader inputSam = SamReaderFactory.makeDefault().open(d.OUTPUT); for (SAMRecord r: inputSam) { boolean duplicateReadFlag = r.getDuplicateReadFlag(); String readName = r.getReadName(); if (dupes.contains(readName)) Assert.assertTrue(duplicateReadFlag); else Assert.assertFalse(duplicateReadFlag); } final List<SpermSeqMarkDuplicates.PCRDuplicateMetrics> beans = MetricsFile.readBeans(d.OUTPUT_STATS); Assert.assertEquals(1, beans.size()); Assert.assertEquals(dupes.size(), beans.get(0).NUM_DUPLICATES); }
Example 17
Source File: SamRecordComparision.java From cramtools with Apache License 2.0 | 5 votes |
public static Object getValue(SAMRecord record, FIELD_TYPE field, String tagId) { if (field == null) throw new IllegalArgumentException("Record field is null."); switch (field) { case QNAME: return record.getReadName(); case FLAG: return Integer.toString(record.getFlags()); case RNAME: return record.getReferenceName(); case POS: return Integer.toString(record.getAlignmentStart()); case MAPQ: return Integer.toString(record.getMappingQuality()); case CIGAR: return record.getCigarString(); case RNEXT: return record.getMateReferenceName(); case PNEXT: return Integer.toString(record.getMateAlignmentStart()); case TLEN: return Integer.toString(record.getInferredInsertSize()); case SEQ: return record.getReadString(); case QUAL: return record.getBaseQualityString(); case TAG: if (tagId == null) throw new IllegalArgumentException("Tag mismatch reqiues tag id. "); return record.getAttribute(tagId); default: throw new IllegalArgumentException("Unknown record field: " + field.name()); } }
Example 18
Source File: SamToFastq.java From picard with MIT License | 4 votes |
private void writeRecord(final SAMRecord read, final Integer mateNumber, final FastqWriter writer, final int basesToTrim, final Integer maxBasesToWrite) { final String seqHeader = mateNumber == null ? read.getReadName() : read.getReadName() + "/" + mateNumber; String readString = read.getReadString(); String baseQualities = read.getBaseQualityString(); // If we're clipping, do the right thing to the bases or qualities if (CLIPPING_ATTRIBUTE != null) { Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE); if (clipPoint != null && clipPoint < CLIPPING_MIN_LENGTH) { clipPoint = Math.min(readString.length(), CLIPPING_MIN_LENGTH); } if (clipPoint != null) { if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TRIM)) { readString = clip(readString, clipPoint, null, !read.getReadNegativeStrandFlag()); baseQualities = clip(baseQualities, clipPoint, null, !read.getReadNegativeStrandFlag()); } else if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TO_N)) { readString = clip(readString, clipPoint, CLIP_TO_N.charAt(0), !read.getReadNegativeStrandFlag()); } else { final char newQual = SAMUtils.phredToFastq(new byte[]{(byte) Integer.parseInt(CLIPPING_ACTION)}).charAt(0); baseQualities = clip(baseQualities, clipPoint, newQual, !read.getReadNegativeStrandFlag()); } } } if (RE_REVERSE && read.getReadNegativeStrandFlag()) { readString = SequenceUtil.reverseComplement(readString); baseQualities = StringUtil.reverseString(baseQualities); } if (basesToTrim > 0) { readString = readString.substring(basesToTrim); baseQualities = baseQualities.substring(basesToTrim); } // Perform quality trimming if desired, making sure to leave at least one base! if (QUALITY != null) { final byte[] quals = SAMUtils.fastqToPhred(baseQualities); final int qualityTrimIndex = Math.max(1, TrimmingUtil.findQualityTrimPoint(quals, QUALITY)); if (qualityTrimIndex < quals.length) { readString = readString.substring(0, qualityTrimIndex); baseQualities = baseQualities.substring(0, qualityTrimIndex); } } if (maxBasesToWrite != null && maxBasesToWrite < readString.length()) { readString = readString.substring(0, maxBasesToWrite); baseQualities = baseQualities.substring(0, maxBasesToWrite); } writer.write(new FastqRecord(seqHeader, readString, "", baseQualities)); }
Example 19
Source File: CgSamBamSequenceDataSource.java From rtg-tools with BSD 2-Clause "Simplified" License | 4 votes |
/** * Unroll both the read bases and quality data for a CG alignment * @param rec the alignment * @return the unrolled read */ public static SamSequence unrollCgRead(SAMRecord rec) { final int projectedSplit = rec.getAlignmentStart() * ((rec.getFlags() & SamBamConstants.SAM_MATE_IS_REVERSE) != 0 ? 1 : -1); final byte flags = SamSequence.getFlags(rec); final byte[] expandedRead; final byte[] expandedQual; if (rec.getReadUnmappedFlag()) { expandedRead = rec.getReadBases(); expandedQual = rec.getBaseQualities(); } else { final String gc = rec.getStringAttribute(SamUtils.ATTRIBUTE_CG_RAW_READ_INSTRUCTIONS); if (gc == null) { throw new NoTalkbackSlimException("SAM Record does not contain CGI read reconstruction attribute: " + rec.getSAMString()); } final byte[] gq = FastaUtils.asciiToRawQuality(SamUtils.allowEmpty(rec.getStringAttribute(SamUtils.ATTRIBUTE_CG_OVERLAP_QUALITY))); final byte[] gs = SamUtils.allowEmpty(rec.getStringAttribute(SamUtils.ATTRIBUTE_CG_OVERLAP_BASES)).getBytes(); final boolean legacyLegacy = gq.length == gs.length / 2; expandedRead = unrollLegacyRead(rec.getReadBases(), gs, gc); if (expandedRead == null) { throw new NoTalkbackSlimException("Could not reconstruct read bases for record: " + rec.getSAMString()); } if (rec.getBaseQualities().length == 0) { expandedQual = rec.getBaseQualities(); } else { if (!legacyLegacy && gq.length != gs.length) { throw new NoTalkbackSlimException("Unexpected length of CG quality information: " + rec.getSAMString()); } final byte[] samQualities = rec.getBaseQualities(); if (legacyLegacy) { expandedQual = unrollLegacyLegacyQualities(samQualities, gq, gc); } else { final byte[] bytes = unrollLegacyRead(samQualities, gq, gc); expandedQual = bytes; } } } final byte[] readBytes = CgUtils.unPad(expandedRead, !rec.getReadNegativeStrandFlag()); final byte[] readQual = CgUtils.unPad(expandedQual, !rec.getReadNegativeStrandFlag()); return new SamSequence(rec.getReadName(), readBytes, readQual, flags, projectedSplit); }
Example 20
Source File: BamToBfqWriter.java From picard with MIT License | 4 votes |
/** * Path for writing bfqs for paired-end reads * * @param iterator the iterator witht he SAM Records to write * @param tagFilter the filter for noise reads * @param qualityFilter the filter for PF reads */ private void writePairedEndBfqs(final Iterator<SAMRecord> iterator, final TagFilter tagFilter, final FailsVendorReadQualityFilter qualityFilter, SamRecordFilter ... otherFilters) { // Open the codecs for writing int fileIndex = 0; initializeNextBfqFiles(fileIndex++); int records = 0; RECORD_LOOP: while (iterator.hasNext()) { final SAMRecord first = iterator.next(); if (!iterator.hasNext()) { throw new PicardException("Mismatched number of records in " + this.bamFile.getAbsolutePath()); } final SAMRecord second = iterator.next(); if (!second.getReadName().equals(first.getReadName()) || first.getFirstOfPairFlag() == second.getFirstOfPairFlag()) { throw new PicardException("Unmatched read pairs in " + this.bamFile.getAbsolutePath() + ": " + first.getReadName() + ", " + second.getReadName() + "."); } // If *both* are noise reads, filter them out if (tagFilter.filterOut(first) && tagFilter.filterOut(second)) { continue; } // If either fails to pass filter, then exclude them as well if (!includeNonPfReads && (qualityFilter.filterOut(first) || qualityFilter.filterOut(second))) { continue; } // If either fails any of the other filters, exclude them both for (SamRecordFilter filter : otherFilters) { if (filter.filterOut(first) || filter.filterOut(second)) { continue RECORD_LOOP; } } // Otherwise, write them out records++; if (records % increment == 0) { first.setReadName(first.getReadName() + "/1"); writeFastqRecord(first.getFirstOfPairFlag() ? codec1 : codec2, first); second.setReadName(second.getReadName() + "/2"); writeFastqRecord(second.getFirstOfPairFlag() ? codec1 : codec2, second); wrote++; if (wrote % 1000000 == 0) { log.info(wrote + " records written."); } if (chunk > 0 && wrote % chunk == 0) { initializeNextBfqFiles(fileIndex++); } } } }