Java Code Examples for htsjdk.samtools.SamReader#iterator()
The following examples show how to use
htsjdk.samtools.SamReader#iterator() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TagValueFilteringIteratorTest.java From Drop-seq with MIT License | 6 votes |
@Test public void testCellBarcodeFiltering () { String cellBarcodeTag = "XC"; SamReader inputSam = SamReaderFactory.makeDefault().open(IN_FILE); String [] desiredBarcodes = {"ATCAGGGACAGA", "TTGCCTTACGCG", "TACAATTAAGGC"}; TagValueFilteringIterator<String> iter = new TagValueFilteringIterator<String>(inputSam.iterator(), cellBarcodeTag, Arrays.asList(desiredBarcodes)); Set<String> barcodesFound = new HashSet<String>(); while (iter.hasNext()) { SAMRecord r = iter.next(); String v = r.getStringAttribute(cellBarcodeTag); barcodesFound.add(v); } // should be the same size. Assert.assertEquals(barcodesFound.size(), desiredBarcodes.length); // should contain the same answers. for (String s: desiredBarcodes) Assert.assertTrue(barcodesFound.contains(s)); }
Example 2
Source File: SamClosedFileReaderTest.java From rtg-tools with BSD 2-Clause "Simplified" License | 6 votes |
private void check(final File bam) throws IOException { final SamReader normalReader = SamUtils.makeSamReader(bam); final Iterator<SAMRecord> norm = normalReader.iterator(); try { try (final RecordIterator<SAMRecord> closed = new SamClosedFileReader(bam, null, null, normalReader.getFileHeader())) { while (norm.hasNext() && closed.hasNext()) { final SAMRecord a = norm.next(); final SAMRecord b = closed.next(); assertEquals(a.getSAMString().trim(), b.getSAMString().trim()); } assertEquals(norm.hasNext(), closed.hasNext()); } } finally { normalReader.close(); } }
Example 3
Source File: SNPUMIBasePileupTest.java From Drop-seq with MIT License | 6 votes |
@Test(enabled=true) public void testAllReadsPileup() { SamReader reader = SamReaderFactory.makeDefault().open(smallBAMFile); Iterator<SAMRecord> iter = reader.iterator(); int snpPos=76227022; Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test"); SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA"); while (iter.hasNext()) p.addRead(iter.next()); List<Character> bases= p.getBasesAsCharacters(); ObjectCounter<Character> baseCounter = new ObjectCounter<>(); for (Character c: bases) baseCounter.increment(c); Assert.assertEquals(6, baseCounter.getCountForKey('A')); Assert.assertEquals(7, baseCounter.getCountForKey('G')); Assert.assertNotNull(p.toString()); }
Example 4
Source File: FastqToSamTest.java From picard with MIT License | 6 votes |
private void convertFileAndVerifyRecordCount(final int expectedCount, final String fastqFilename1, final String fastqFilename2, final FastqQualityFormat version, final boolean permissiveFormat, final boolean useSequentialFastqs) throws IOException { final File samFile = convertFile(fastqFilename1, fastqFilename2, version, permissiveFormat, useSequentialFastqs); final SamReader samReader = SamReaderFactory.makeDefault().open(samFile); final SAMRecordIterator iterator = samReader.iterator(); int actualCount = 0; while (iterator.hasNext()) { iterator.next(); actualCount++; } samReader.close(); Assert.assertEquals(actualCount, expectedCount); }
Example 5
Source File: Reader.java From dataflow-java with Apache License 2.0 | 6 votes |
void openFile() throws IOException { LOG.info("Processing shard " + shard); final SamReader reader = BAMIO.openBAM(storageClient, shard.file, options.getStringency()); iterator = null; if (reader.hasIndex() && reader.indexing() != null) { if (filter == Filter.UNMAPPED_ONLY) { LOG.info("Processing unmapped"); iterator = reader.queryUnmapped(); } else if (shard.span != null) { LOG.info("Processing span for " + shard.contig); iterator = reader.indexing().iterator(shard.span); } else if (shard.contig.referenceName != null && !shard.contig.referenceName.isEmpty()) { LOG.info("Processing all bases for " + shard.contig); iterator = reader.query(shard.contig.referenceName, (int) shard.contig.start, (int) shard.contig.end, false); } } if (iterator == null) { LOG.info("Processing all reads"); iterator = reader.iterator(); } }
Example 6
Source File: CleanSam.java From picard with MIT License | 5 votes |
/** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE); if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) { factory.validationStringency(ValidationStringency.LENIENT); } final SamReader reader = factory.open(INPUT); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); final CloseableIterator<SAMRecord> it = reader.iterator(); final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class)); // If the read (or its mate) maps off the end of the alignment, clip it while (it.hasNext()) { final SAMRecord rec = it.next(); // If the read (or its mate) maps off the end of the alignment, clip it AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec); // check the read's mapping quality if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) { rec.setMappingQuality(0); } writer.addAlignment(rec); progress.record(rec); } writer.close(); it.close(); CloserUtil.close(reader); return 0; }
Example 7
Source File: CollectJumpingLibraryMetrics.java From picard with MIT License | 5 votes |
/** * Calculates the mode for outward-facing pairs, using the first SAMPLE_FOR_MODE * outward-facing pairs found in INPUT */ private double getOutieMode() { int samplePerFile = SAMPLE_FOR_MODE / INPUT.size(); Histogram<Integer> histo = new Histogram<Integer>(); for (File f : INPUT) { SamReader reader = SamReaderFactory.makeDefault().open(f); int sampled = 0; for (Iterator<SAMRecord> it = reader.iterator(); it.hasNext() && sampled < samplePerFile; ) { SAMRecord sam = it.next(); if (!sam.getFirstOfPairFlag()) { continue; } // If we get here we've hit the end of the aligned reads if (sam.getReadUnmappedFlag() && sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { break; } else if (sam.getReadUnmappedFlag() || sam.getMateUnmappedFlag()) { continue; } else if ((sam.getAttribute(SAMTag.MQ.name()) == null || sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) && sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY && sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() && sam.getMateReferenceIndex().equals(sam.getReferenceIndex()) && SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) { histo.increment(Math.abs(sam.getInferredInsertSize())); sampled++; } } CloserUtil.close(reader); } return histo.size() > 0 ? histo.getMode() : 0; }
Example 8
Source File: SNPUMIBasePileupTest.java From Drop-seq with MIT License | 5 votes |
private SAMRecord getReadByName (final String readName, final File bamFile) { SamReader reader = SamReaderFactory.makeDefault().open(bamFile); Iterator<SAMRecord> iter = reader.iterator(); while (iter.hasNext()) { SAMRecord r1 = iter.next(); if (r1.getReadName().equals(readName)) return (r1); } return null; }
Example 9
Source File: SNPUMICellReadIteratorWrapperTest.java From Drop-seq with MIT License | 5 votes |
/** * Integration style test to see if I can throw null pointers, etc with a bit of real data. */ @Test(enabled=true) public void processReads() { List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile); IntervalList loci = IntervalList.fromFile(snpIntervals); SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile); // Filter records before sorting, to reduce I/O final MissingTagFilteringIterator filteringIterator = new MissingTagFilteringIterator(reader.iterator(), GENE_NAME_TAG, cellBarcodeTag, molBCTag); MapQualityFilteredIterator filteringIterator2 = new MapQualityFilteredIterator(filteringIterator, readMQ, true); GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, false, STRAND_STRATEGY, LOCUS_FUNCTION_LIST); SNPUMICellReadIteratorWrapper snpumiCellReadIterator = new SNPUMICellReadIteratorWrapper(gfteratorWrapper, loci, cellBarcodeTag, cellBarcodeList, GENE_NAME_TAG, snpTag, readMQ); // create comparators in the order the data should be sorted final MultiComparator<SAMRecord> multiComparator = new MultiComparator<>( new StringTagComparator(cellBarcodeTag), new StringTagComparator(GENE_NAME_TAG), new StringTagComparator(molBCTag)); final CloseableIterator<SAMRecord> sortingIterator = SamRecordSortingIteratorFactory.create(reader.getFileHeader(), snpumiCellReadIterator, multiComparator, null); Assert.assertTrue(sortingIterator.hasNext()); SAMRecord nextRead = sortingIterator.next(); List<SAMTagAndValue> tagValues= nextRead.getAttributes(); String snpTagValue = nextRead.getStringAttribute(this.snpTag); CloserUtil.close(snpumiCellReadIterator); }
Example 10
Source File: SNPUMICellReadIteratorWrapperTest.java From Drop-seq with MIT License | 5 votes |
private SAMRecord getReadByName (final String name, final File bamFile) { SamReader reader = SamReaderFactory.makeDefault().open(bamFile); Iterator<SAMRecord> iter = reader.iterator(); while (iter.hasNext()) { SAMRecord r = iter.next(); if (r.getReadName().equals(name)) return (r); } return null; }
Example 11
Source File: AggregatedTagOrderIteratorTest.java From Drop-seq with MIT License | 5 votes |
private Iterator<List<SAMRecord>> filterSortAndGroupByTags(final File bamFile, final String...tags) { final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile); final Iterator<SAMRecord> filteringIterator = new MissingTagFilteringIterator(reader.iterator(), tags); final List<Comparator<SAMRecord>> comparators = new ArrayList<>(tags.length); for (final String tag: tags) { comparators.add(new StringTagComparator(tag)); } final MultiComparator<SAMRecord> comparator = new MultiComparator<>(comparators); final CloseableIterator<SAMRecord> sortingIterator = SamRecordSortingIteratorFactory.create(reader.getFileHeader(), filteringIterator, comparator, null); return new GroupingIterator<>(sortingIterator, comparator); }
Example 12
Source File: AggregatedTagOrderIteratorTest.java From Drop-seq with MIT License | 5 votes |
private Iterator<List<SAMRecord>> filterSortAndGroupByTagsAndQuality(final File bamFile, final String...tags) { final SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile); // Stack the two filters final Iterator<SAMRecord> filteringIterator = new MapQualityFilteredIterator(new MissingTagFilteringIterator(reader.iterator(), tags), 10, true); final List<Comparator<SAMRecord>> comparators = new ArrayList<>(tags.length); for (final String tag: tags) { comparators.add(new StringTagComparator(tag)); } final MultiComparator<SAMRecord> comparator = new MultiComparator<>(comparators); final CloseableIterator<SAMRecord> sortingIterator = SamRecordSortingIteratorFactory.create(reader.getFileHeader(), filteringIterator, comparator, null); return new GroupingIterator<>(sortingIterator, comparator); }
Example 13
Source File: SplitSamByLibraryTest.java From picard with MIT License | 5 votes |
private int countReads(File samFile) { SamReader reader = SamReaderFactory.makeDefault().open(samFile); int count = 0; for (Iterator it = reader.iterator(); it.hasNext(); ) { it.next(); count++; } CloserUtil.close(reader); return count; }
Example 14
Source File: BamToBfqWriter.java From picard with MIT License | 5 votes |
/** * Writes the binary fastq file(s) to the output directory */ public void writeBfqFiles() { final SamReader reader = SamReaderFactory.makeDefault().open(bamFile); final Iterator<SAMRecord> iterator = reader.iterator(); // Filter out noise reads and reads that fail the quality filter final TagFilter tagFilter = new TagFilter(ReservedTagConstants.XN, 1); final FailsVendorReadQualityFilter qualityFilter = new FailsVendorReadQualityFilter(); final WholeReadClippedFilter clippedFilter = new WholeReadClippedFilter(); if (!pairedReads) { List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(); filters.add(tagFilter); filters.add(clippedFilter); if (!this.includeNonPfReads) { filters.add(qualityFilter); } writeSingleEndBfqs(iterator, filters); codec1.close(); } else { writePairedEndBfqs(iterator, tagFilter, qualityFilter, clippedFilter); codec1.close(); codec2.close(); } log.info("Wrote " + wrote + " bfq records."); CloserUtil.close(reader); }
Example 15
Source File: ReadLocusReader.java From abra2 with MIT License | 5 votes |
public ReadLocusIterator(SamReader samReader, Feature region, int maxDepth) { this.maxDepth = maxDepth; if (region != null) { samIter = new ForwardShiftInsertIterator(samReader.queryOverlapping(region.getSeqname(), (int) region.getStart(), (int) region.getEnd())); } else { samIter = new ForwardShiftInsertIterator(samReader.iterator()); } }
Example 16
Source File: CompareBaseQualities.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override protected Object doWork() { if (roundDown && (staticQuantizationQuals == null || staticQuantizationQuals.isEmpty())){ throw new CommandLineException.BadArgumentValue( ROUND_DOWN_QUANTIZED_LONG_NAME, "true", "This option can only be used if " + STATIC_QUANTIZED_QUALS_LONG_NAME + " is also used."); } staticQuantizedMapping = constructStaticQuantizedMapping(staticQuantizationQuals, roundDown); IOUtil.assertFileIsReadable(samFiles.get(0)); IOUtil.assertFileIsReadable(samFiles.get(1)); final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY); final SamReader reader1 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(0)); final SamReader reader2 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(1)); final SecondaryOrSupplementarySkippingIterator it1 = new SecondaryOrSupplementarySkippingIterator(reader1.iterator()); final SecondaryOrSupplementarySkippingIterator it2 = new SecondaryOrSupplementarySkippingIterator(reader2.iterator()); final CompareMatrix finalMatrix = new CompareMatrix(staticQuantizedMapping); final ProgressMeter progressMeter = new ProgressMeter(1.0); progressMeter.start(); while (it1.hasCurrent() && it2.hasCurrent()) { final SAMRecord read1= it1.getCurrent(); final SAMRecord read2= it2.getCurrent(); progressMeter.update(read1); if (!read1.getReadName().equals(read2.getReadName())){ throw new UserException.BadInput("files do not have the same exact order of reads:" + read1.getReadName() + " vs " + read2.getReadName()); } finalMatrix.add(read1.getBaseQualities(), read2.getBaseQualities()); it1.advance(); it2.advance(); } progressMeter.stop(); if (it1.hasCurrent() || it2.hasCurrent()){ throw new UserException.BadInput("files do not have the same exact number of reads"); } CloserUtil.close(reader1); CloserUtil.close(reader2); finalMatrix.printOutResults(outputFilename); if (throwOnDiff && finalMatrix.hasNonDiagonalElements()) { throw new UserException("Quality scores from the two BAMs do not match"); } return finalMatrix.hasNonDiagonalElements() ? 1 : 0; }
Example 17
Source File: BamToBfqWriter.java From picard with MIT License | 4 votes |
/** * Count the number of records in the bamFile that could potentially be written * * @return the number of records in the Bam file */ private int countWritableRecords() { int count = 0; final SamReader reader = SamReaderFactory.makeDefault().open(this.bamFile); if(!reader.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.queryname)) { //this is a fix for issue PIC-274: It looks like BamToBfqWriter requires that the input BAM is queryname sorted, //but it doesn't check this early, nor produce an understandable error message." throw new PicardException("Input file (" + this.bamFile.getAbsolutePath() +") needs to be sorted by queryname."); } final PeekableIterator<SAMRecord> it = new PeekableIterator<SAMRecord>(reader.iterator()); if (!this.pairedReads) { // Filter out noise reads and reads that fail the quality filter final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(); filters.add(new TagFilter(ReservedTagConstants.XN, 1)); if (!this.includeNonPfReads) { filters.add(new FailsVendorReadQualityFilter()); } final FilteringSamIterator itr = new FilteringSamIterator(it, new AggregateFilter(filters)); while (itr.hasNext()) { itr.next(); count++; } } else { while (it.hasNext()) { final SAMRecord first = it.next(); final SAMRecord second = it.next(); // If both are noise reads, filter them out if (first.getAttribute(ReservedTagConstants.XN) != null && second.getAttribute(ReservedTagConstants.XN) != null) { // skip it } // If either fails to pass filter, then exclude them as well else if (!this.includeNonPfReads && (first.getReadFailsVendorQualityCheckFlag() || second.getReadFailsVendorQualityCheckFlag()) ) { // skip it } // Otherwise, write them out else { count++; } } } it.close(); CloserUtil.close(reader); return count; }
Example 18
Source File: MarkIlluminaAdapters.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(METRICS); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder(); SAMFileWriter out = null; if (OUTPUT != null) { IOUtil.assertFileIsWritable(OUTPUT); out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT); } final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count"); // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping final AdapterPair[] adapters; { final List<AdapterPair> tmp = new ArrayList<AdapterPair>(); tmp.addAll(ADAPTERS); if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) { tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER)); } adapters = tmp.toArray(new AdapterPair[tmp.size()]); } //////////////////////////////////////////////////////////////////////// // Main loop that consumes reads, clips them and writes them to the output //////////////////////////////////////////////////////////////////////// final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read"); final SAMRecordIterator iterator = in.iterator(); final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters). setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE). setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE). setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP). setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null; rec.setAttribute(ReservedTagConstants.XT, null); // Do the clipping one way for PE and another for SE reads if (rec.getReadPairedFlag()) { // Assert that the input file is in query name order only if we see some PE reads if (order != SAMFileHeader.SortOrder.queryname) { throw new PicardException("Input BAM file must be sorted by queryname"); } if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName()); rec2.setAttribute(ReservedTagConstants.XT, null); // Assert that we did in fact just get two mate pairs if (!rec.getReadName().equals(rec2.getReadName())) { throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " + rec.getReadName() + ", " + rec2.getReadName()); } // establish which of pair is first and which second final SAMRecord first, second; if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) { first = rec; second = rec2; } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) { first = rec2; second = rec; } else { throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName()); } adapterMarker.adapterTrimIlluminaPairedReads(first, second); } else { adapterMarker.adapterTrimIlluminaSingleRead(rec); } // Then output the records, update progress and metrics for (final SAMRecord r : new SAMRecord[]{rec, rec2}) { if (r != null) { progress.record(r); if (out != null) out.addAlignment(r); final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT); if (clip != null) histo.increment(r.getReadLength() - clip + 1); } } } if (out != null) out.close(); // Lastly output the metrics to file final MetricsFile<?, Integer> metricsFile = getMetricsFile(); metricsFile.setHistogram(histo); metricsFile.write(METRICS); CloserUtil.close(in); return 0; }
Example 19
Source File: WriteBAIFn.java From dataflow-java with Apache License 2.0 | 4 votes |
@ProcessElement public void processElement(DoFn<String, String>.ProcessContext c) throws Exception { Metrics.counter(WriteBAIFn.class, "Initialized Indexing Shard Count").inc(); Stopwatch stopWatch = Stopwatch.createStarted(); final HeaderInfo header = c.sideInput(headerView); final String bamFilePath = c.sideInput(writtenBAMFilerView); final Iterable<KV<Integer, Long>> sequenceShardSizes = c.sideInput(sequenceShardSizesView); final String sequenceName = c.element(); final int sequenceIndex = header.header.getSequence(sequenceName).getSequenceIndex(); final String baiFilePath = bamFilePath + "-" + String.format("%02d",sequenceIndex) + "-" + sequenceName + ".bai"; long offset = 0; int skippedReferences = 0; long bytesToProcess = 0; for (KV<Integer, Long> sequenceShardSize : sequenceShardSizes) { if (sequenceShardSize.getKey() < sequenceIndex) { offset += sequenceShardSize.getValue(); skippedReferences++; } else if (sequenceShardSize.getKey() == sequenceIndex) { bytesToProcess = sequenceShardSize.getValue(); } } LOG.info("Generating BAI index: " + baiFilePath); LOG.info("Reading BAM file: " + bamFilePath + " for reference " + sequenceName + ", skipping " + skippedReferences + " references at offset " + offset + ", expecting to process " + bytesToProcess + " bytes"); Options options = c.getPipelineOptions().as(Options.class); final Storage.Objects storage = Transport.newStorageClient( options .as(GCSOptions.class)) .build() .objects(); final SamReader reader = BAMIO.openBAM(storage, bamFilePath, ValidationStringency.SILENT, true, offset); final OutputStream outputStream = Channels.newOutputStream( new GcsUtil.GcsUtilFactory().create(options) .create(GcsPath.fromUri(baiFilePath), BAMIO.BAM_INDEX_FILE_MIME_TYPE)); final BAMShardIndexer indexer = new BAMShardIndexer(outputStream, reader.getFileHeader(), sequenceIndex); long processedReads = 0; long skippedReads = 0; // create and write the content if (bytesToProcess > 0) { SAMRecordIterator it = reader.iterator(); boolean foundRecords = false; while (it.hasNext()) { SAMRecord r = it.next(); if (!r.getReferenceName().equals(sequenceName)) { if (foundRecords) { LOG.info("Finishing index building for " + sequenceName + " after processing " + processedReads); break; } skippedReads++; continue; } else if (!foundRecords) { LOG.info("Found records for refrence " + sequenceName + " after skipping " + skippedReads); foundRecords = true; } indexer.processAlignment(r); processedReads++; } it.close(); } else { LOG.info("No records for refrence " + sequenceName + ": writing empty index "); } long noCoordinateReads = indexer.finish(); c.output(baiFilePath); c.output(NO_COORD_READS_COUNT_TAG, noCoordinateReads); LOG.info("Generated " + baiFilePath + ", " + processedReads + " reads, " + noCoordinateReads + " no coordinate reads, " + skippedReads + ", skipped reads"); stopWatch.stop(); Metrics.distribution(WriteBAIFn.class, "Indexing Shard Processing Time (sec)").update( stopWatch.elapsed(TimeUnit.SECONDS)); Metrics.counter(WriteBAIFn.class, "Finished Indexing Shard Count").inc(); Metrics.counter(WriteBAIFn.class, "Indexed reads").inc(processedReads); Metrics.counter(WriteBAIFn.class, "Indexed no coordinate reads").inc(noCoordinateReads); Metrics.distribution(WriteBAIFn.class, "Reads Per Indexing Shard").update(processedReads); }
Example 20
Source File: ReAligner.java From abra2 with MIT License | 4 votes |
private void getSamHeaderAndReadLength() throws IOException { Logger.info("Identifying header and determining read length"); this.samHeaders = new SAMFileHeader[this.inputSams.length]; for (int i=0; i<this.inputSams.length; i++) { SamReader reader = SAMRecordUtils.getSamReader(inputSams[i]); try { samHeaders[i] = reader.getFileHeader(); samHeaders[i].setSortOrder(SAMFileHeader.SortOrder.unsorted); Iterator<SAMRecord> iter = reader.iterator(); int cnt = 0; while ((iter.hasNext()) && (cnt < 1000000)) { SAMRecord read = iter.next(); this.readLength = Math.max(this.readLength, read.getReadLength()); this.maxMapq = Math.max(this.maxMapq, read.getMappingQuality()); // Assumes aligner sets proper pair flag correctly if ((isPairedEnd) && (read.getReadPairedFlag()) && (read.getProperPairFlag())) { this.minInsertLength = Math.min(this.minInsertLength, Math.abs(read.getInferredInsertSize())); this.maxInsertLength = Math.max(this.maxInsertLength, Math.abs(read.getInferredInsertSize())); } cnt += 1; } // Allow some fudge in insert length minInsertLength = Math.max(minInsertLength - 2*readLength, 0); maxInsertLength = maxInsertLength + 2*readLength; } finally { reader.close(); } } Logger.info("Min insert length: " + minInsertLength); Logger.info("Max insert length: " + maxInsertLength); Logger.info("Max read length is: " + readLength); if (assemblerSettings.getMinContigLength() < 1) { assemblerSettings.setMinContigLength(Math.max(readLength+1, MIN_CONTIG_LENGTH)); } Logger.info("Min contig length: " + assemblerSettings.getMinContigLength()); }