Java Code Examples for htsjdk.samtools.SAMRecord#getDuplicateReadFlag()
The following examples show how to use
htsjdk.samtools.SAMRecord#getDuplicateReadFlag() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BamTagHistogram.java From Drop-seq with MIT License | 6 votes |
public ObjectCounter<String> getBamTagCounts (final Iterator<SAMRecord> iterator, final String tag, final int readQuality, final boolean filterPCRDuplicates) { ProgressLogger pl = new ProgressLogger(log, 10000000); ObjectCounter<String> counter = new ObjectCounter<>(); for (final SAMRecord r : new IterableAdapter<>(iterator)) { pl.record(r); if (filterPCRDuplicates && r.getDuplicateReadFlag()) continue; if (r.getMappingQuality()<readQuality) continue; if (r.isSecondaryOrSupplementary()) continue; //String s1 = r.getStringAttribute(tag); String s1 = getAnyTagAsString(r, tag); if (s1!=null && s1!="") counter.increment(s1); // if the tag doesn't have a value, don't increment it. } return (counter); }
Example 2
Source File: UnmarkDuplicatesIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException { int duplicateCount = 0; final SamReaderFactory factory = SamReaderFactory.makeDefault(); if(referenceFile != null) { factory.referenceSequence(referenceFile); } try ( final SamReader reader = factory.open(bam) ) { for ( SAMRecord read : reader ) { if ( read.getDuplicateReadFlag() ) { ++duplicateCount; } } } return duplicateCount; }
Example 3
Source File: ReadQualityMetrics.java From Drop-seq with MIT License | 6 votes |
public void addRead (SAMRecord r) { // skip secondary of supplemental reads. if (r.isSecondaryOrSupplementary()) { return; } boolean isDupe = r.getDuplicateReadFlag(); int mapQuality = r.getMappingQuality(); boolean unmapped = r.getReadUnmappedFlag(); if (histogram!=null) { histogram.increment(mapQuality); } totalReads++; if (!unmapped) { mappedReads++; if (mapQuality >= this.mapQuality) { hqMappedReads++; if (!isDupe) { hqMappedReadsNoPCRDupes++; } } } }
Example 4
Source File: SpermSeqMarkDuplicatesTest.java From Drop-seq with MIT License | 5 votes |
@Test // tests which reads are marked as duplicates by the read position strategy. public void testDetectDuplicatesByReadPositionStrategy() throws IOException { String [] duplicateReadNames={"READ1:2", "READ2:3"}; Set<String> dupes = new HashSet<String>(Arrays.asList(duplicateReadNames)); SpermSeqMarkDuplicates d = new SpermSeqMarkDuplicates(); d.INPUT=Arrays.asList(INPUT); d.OUTPUT=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".bam"); d.OUTPUT.deleteOnExit(); d.OUTPUT_STATS=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".pcr_duplicate_metrics"); d.OUTPUT_STATS.deleteOnExit(); Assert.assertEquals(0, d.doWork()); SamReader inputSam = SamReaderFactory.makeDefault().open(d.OUTPUT); for (SAMRecord r: inputSam) { boolean duplicateReadFlag = r.getDuplicateReadFlag(); String readName = r.getReadName(); if (dupes.contains(readName)) Assert.assertTrue(duplicateReadFlag); else Assert.assertFalse(duplicateReadFlag); } final List<SpermSeqMarkDuplicates.PCRDuplicateMetrics> beans = MetricsFile.readBeans(d.OUTPUT_STATS); Assert.assertEquals(1, beans.size()); Assert.assertEquals(dupes.size(), beans.get(0).NUM_DUPLICATES); }
Example 5
Source File: BamSlicer.java From hmftools with GNU General Public License v3.0 | 5 votes |
private boolean passesFilters(@NotNull final SAMRecord record) { if(record.getMappingQuality() < mMinMappingQuality || record.getReadUnmappedFlag()) return false; if(mDropSupplementaries && (record.getSupplementaryAlignmentFlag() || record.isSecondaryAlignment())) return false; if(mDropDuplicates && record.getDuplicateReadFlag()) return false; return true; }
Example 6
Source File: GcBiasMetricsCollector.java From picard with MIT License | 5 votes |
@Override public void acceptRecord(final GcBiasCollectorArgs args) { final SAMRecord rec = args.getRec(); if (logCounter < 100 && rec.getReadBases().length == 0) { log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field."); if (++logCounter == 100) { log.warn("There are more than 100 reads with '*' in SEQ field in file."); } return; } if (!rec.getReadUnmappedFlag()) { if (referenceIndex != rec.getReferenceIndex() || gc == null) { final ReferenceSequence ref = args.getRef(); refBases = ref.getBases(); StringUtil.toUpperCase(refBases); final int refLength = refBases.length; final int lastWindowStart = refLength - scanWindowSize; gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize); referenceIndex = rec.getReferenceIndex(); } addReadToGcData(rec, this.gcData); if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { addReadToGcData(rec, this.gcDataNonDups); } } else { updateTotalClusters(rec, this.gcData); if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { updateTotalClusters(rec, this.gcDataNonDups); } } }
Example 7
Source File: InsertSizeMetricsCollector.java From picard with MIT License | 5 votes |
@Override public void acceptRecord(final SAMRecord record, final ReferenceSequence refSeq) { if (!record.getReadPairedFlag() || record.getReadUnmappedFlag() || record.getMateUnmappedFlag() || record.getFirstOfPairFlag() || record.isSecondaryOrSupplementary() || (record.getDuplicateReadFlag() && !this.includeDuplicates) || record.getInferredInsertSize() == 0) { return; } super.acceptRecord(record, refSeq); }
Example 8
Source File: CollectDuplicateMetrics.java From picard with MIT License | 5 votes |
@Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { final DuplicationMetrics metrics = AbstractMarkDuplicatesCommandLineProgram.addReadToLibraryMetrics(rec, header, libraryIdGenerator); final boolean isDuplicate = rec.getDuplicateReadFlag(); if (isDuplicate) { AbstractMarkDuplicatesCommandLineProgram.addDuplicateReadToMetrics(rec, metrics); } }
Example 9
Source File: MultiSamReader.java From abra2 with MIT License | 5 votes |
private boolean shouldAssemble(SAMRecord read) { return ( (!read.getDuplicateReadFlag()) && (!read.getReadFailsVendorQualityCheckFlag()) && read.getReadLength() > 0 && (read.getMappingQuality() >= this.minMapqForAssembly || read.getReadUnmappedFlag()) && SAMRecordUtils.isPrimary(read)); // Was previously an id check, so supplemental / secondary alignments could be included }
Example 10
Source File: MNVRegionValidator.java From hmftools with GNU General Public License v3.0 | 4 votes |
private boolean isEligible(@NotNull final SAMRecord record) { return record.getMappingQuality() >= MIN_MAPPING_QUALITY && !(record.getReadUnmappedFlag() || record.getDuplicateReadFlag() || record.isSecondaryOrSupplementary()); }
Example 11
Source File: MarkDuplicatesTagRepresentativeReadIndexTester.java From picard with MIT License | 4 votes |
@Override public void test() { try { updateExpectedDuplicationMetrics(); // Read the output and check the duplicate flag int outputRecords = 0; int indexInFile = 0; final SamReader reader = SamReaderFactory.makeDefault().open(getOutput()); for (final SAMRecord record : reader) { outputRecords++; final String key = samRecordToDuplicatesFlagsKey(record); Assert.assertTrue(this.duplicateFlags.containsKey(key),"DOES NOT CONTAIN KEY: " + key); final boolean value = this.duplicateFlags.get(key); this.duplicateFlags.remove(key); if (value != record.getDuplicateReadFlag()) { System.err.println("Mismatching read:"); System.err.print(record.getSAMString()); } Assert.assertEquals(record.getDuplicateReadFlag(), value); if (testRepresentativeReads) { if (expectedRepresentativeIndexMap.containsKey(indexInFile) && expectedSetSizeMap.containsKey(record.getReadName())){ Assert.assertEquals(record.getAttribute("DI"), expectedRepresentativeIndexMap.get(indexInFile)); Assert.assertEquals(record.getAttribute("DS"), expectedSetSizeMap.get(record.getReadName())); } } indexInFile+=1; } CloserUtil.close(reader); // Ensure the program output the same number of records as were read in Assert.assertEquals(outputRecords, this.getNumberOfRecords(), ("saw " + outputRecords + " output records, vs. " + this.getNumberOfRecords() + " input records")); // Check the values written to metrics.txt against our input expectations final MetricsFile<DuplicationMetrics, Comparable<?>> metricsOutput = new MetricsFile<>(); try{ metricsOutput.read(new FileReader(metricsFile)); } catch (final FileNotFoundException ex) { throw new PicardException("Metrics file not found: " + ex); } final List<DuplicationMetrics> g = metricsOutput.getMetrics(); // expect getMetrics to return a collection with a single duplicateMetrics object Assert.assertEquals(metricsOutput.getMetrics().size(), 1); final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(0); Assert.assertEquals(observedMetrics.UNPAIRED_READS_EXAMINED, expectedMetrics.UNPAIRED_READS_EXAMINED, "UNPAIRED_READS_EXAMINED does not match expected"); Assert.assertEquals(observedMetrics.READ_PAIRS_EXAMINED, expectedMetrics.READ_PAIRS_EXAMINED, "READ_PAIRS_EXAMINED does not match expected"); Assert.assertEquals(observedMetrics.UNMAPPED_READS, expectedMetrics.UNMAPPED_READS, "UNMAPPED_READS does not match expected"); Assert.assertEquals(observedMetrics.UNPAIRED_READ_DUPLICATES, expectedMetrics.UNPAIRED_READ_DUPLICATES, "UNPAIRED_READ_DUPLICATES does not match expected"); Assert.assertEquals(observedMetrics.READ_PAIR_DUPLICATES, expectedMetrics.READ_PAIR_DUPLICATES, "READ_PAIR_DUPLICATES does not match expected"); Assert.assertEquals(observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match expected"); Assert.assertEquals(observedMetrics.PERCENT_DUPLICATION, expectedMetrics.PERCENT_DUPLICATION, "PERCENT_DUPLICATION does not match expected"); Assert.assertEquals(observedMetrics.ESTIMATED_LIBRARY_SIZE, expectedMetrics.ESTIMATED_LIBRARY_SIZE, "ESTIMATED_LIBRARY_SIZE does not match expected"); Assert.assertEquals(observedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS, expectedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS, "SECONDARY_OR_SUPPLEMENTARY_RDS does not match expected"); } finally { IOUtil.recursiveDelete(getOutputDir().toPath()); } }
Example 12
Source File: CountingDuplicateFilter.java From picard with MIT License | 4 votes |
@Override public boolean reallyFilterOut(final SAMRecord record) { return record.getDuplicateReadFlag(); }
Example 13
Source File: SomaticLocusCaller.java From abra2 with MIT License | 4 votes |
private Counts getCounts(SamReader reader, LocusInfo locus) { int depth = 0; int altCount = 0; int refCount = 0; CloseableIterator<SAMRecord> iter = reader.queryOverlapping(locus.chromosome, locus.posStart, locus.posStop); while (iter.hasNext()) { SAMRecord read = iter.next(); if (!read.getDuplicateReadFlag()) { depth += 1; Object[] baseAndQual = getBaseAndQualAtPosition(read, locus.posStart); Character base = (Character) baseAndQual[0]; int baseQual = (Integer) baseAndQual[1]; Character refBase = c2r.getSequence(locus.chromosome, locus.posStart, 1).charAt(0); // Override input with actual reference // locus.ref = new String(new char[] { refBase }); locus.actualRef = new String(new char[] { refBase }); if (locus.isIndel()) { if (hasIndel(read, locus)) { altCount += 1; } else if (!base.equals('N') && base.equals(refBase)) { refCount += 1; } } else if (baseQual >= minBaseQual) { if (!base.equals('N') && base.equals(refBase)) { refCount += 1; } else if (base.equals(locus.alt.charAt(0))) { altCount += 1; } } } } iter.close(); return new Counts(refCount, altCount, depth); }
Example 14
Source File: BamTagOfTagCounts.java From Drop-seq with MIT License | 4 votes |
public TagOfTagResults<String,String> getResults (final File inputBAM, final String primaryTag, final String secondaryTag, final boolean filterPCRDuplicates, final Integer readQuality) { TagOfTagResults<String,String> result = new TagOfTagResults<>(); SamReader reader = SamReaderFactory.makeDefault().open(inputBAM); CloseableIterator<SAMRecord> iter = CustomBAMIterators.getReadsInTagOrder(reader, primaryTag); CloserUtil.close(reader); String currentTag=""; Set<String> otherTagCollection=new HashSet<>(); ProgressLogger progress = new ProgressLogger(log); while (iter.hasNext()) { SAMRecord r = iter.next(); progress.record(r); // skip reads that don't pass filters. boolean discardResult=(filterPCRDuplicates && r.getDuplicateReadFlag()) || r.getMappingQuality()<readQuality || r.isSecondaryOrSupplementary(); // short circuit if read is below the quality to be considered. if (discardResult) continue; Object d = r.getAttribute(secondaryTag); // short circuit if there's no tag for this read. if (d==null) continue; String data=null; if (d instanceof String) data=r.getStringAttribute(secondaryTag); else if (d instanceof Integer) data=Integer.toString(r.getIntegerAttribute(secondaryTag)); String newTag = r.getStringAttribute(primaryTag); if (newTag==null) newTag=""; // if you see a new tag. if (!currentTag.equals(newTag)) { // write out tag results, if any. if (!currentTag.equals("") && otherTagCollection.size()>0) result.addEntries(currentTag, otherTagCollection); currentTag=newTag; otherTagCollection.clear(); if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection); } else // gather stats if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection); } if (otherTagCollection.size()>0) result.addEntries(currentTag, otherTagCollection); return (result); }
Example 15
Source File: ChromosomeReadCount.java From hmftools with GNU General Public License v3.0 | 4 votes |
private boolean isEligible(@NotNull SAMRecord record) { return record.getMappingQuality() >= minMappingQuality && !(record.getReadUnmappedFlag() || record.getDuplicateReadFlag() || record.isSecondaryOrSupplementary()); }
Example 16
Source File: SamSlicer.java From hmftools with GNU General Public License v3.0 | 4 votes |
private boolean samRecordMeetsQualityRequirements(@NotNull final SAMRecord record) { return record.getMappingQuality() >= minMappingQuality && !record.getReadUnmappedFlag() && !record.getDuplicateReadFlag() && !record .isSecondaryOrSupplementary(); }
Example 17
Source File: SAMSlicer.java From hmftools with GNU General Public License v3.0 | 4 votes |
private boolean samRecordMeetsQualityRequirements(@NotNull final SAMRecord record) { return record.getMappingQuality() >= minMappingQuality && !record.getReadUnmappedFlag() && (!record.getDuplicateReadFlag() || !dropDuplicates) && !record.isSecondaryOrSupplementary(); }
Example 18
Source File: DuplicateReadTracker.java From hmftools with GNU General Public License v3.0 | 4 votes |
public boolean checkDuplicates(final SAMRecord record) { if(record.getDuplicateReadFlag()) return true; if(!mMarkDuplicates) return false; if(mDuplicateReadIds.contains(record.getReadName())) { mDuplicateReadIds.remove(record.getReadName()); return true; } if(!record.getReferenceName().equals(record.getMateReferenceName()) || record.getReadNegativeStrandFlag() == record.getMateNegativeStrandFlag()) return false; int firstStartPos = record.getFirstOfPairFlag() ? record.getStart() : record.getMateAlignmentStart(); int secondStartPos = record.getFirstOfPairFlag() ? record.getMateAlignmentStart() : record.getStart(); int readLength = record.getReadLength(); int insertSize = record.getInferredInsertSize(); List<int[]> dupDataList = mDuplicateCache.get(firstStartPos); if(dupDataList == null) { dupDataList = Lists.newArrayList(); mDuplicateCache.put(firstStartPos, dupDataList); } else { // search for a match if(dupDataList.stream().anyMatch(x -> x[DUP_DATA_SECOND_START] == secondStartPos && x[DUP_DATA_READ_LEN] == readLength && insertSize == x[DUP_DATA_INSERT_SIZE])) { ISF_LOGGER.trace("duplicate fragment: id({}) chr({}) pos({}->{}) otherReadStart({}) insertSize({})", record.getReadName(), record.getReferenceName(), firstStartPos, record.getEnd(), secondStartPos, insertSize); // cache so the second read can be identified immediately mDuplicateReadIds.add(record.getReadName()); return true; } } int[] dupData = {secondStartPos, readLength, insertSize}; dupDataList.add(dupData); return false; }
Example 19
Source File: FilterBam.java From Drop-seq with MIT License | 4 votes |
private boolean rejectPCRDuplicate (final SAMRecord r) { if (this.FILTER_PCR_DUPES && r.getDuplicateReadFlag()) return (true); return (false); }
Example 20
Source File: PCRDuplicateFilteringIterator.java From Drop-seq with MIT License | 4 votes |
@Override public boolean filterOut(final SAMRecord rec) { return rec.getDuplicateReadFlag(); }