Java Code Examples for htsjdk.samtools.util.CloserUtil#close()
The following examples show how to use
htsjdk.samtools.util.CloserUtil#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FilterBamByTag.java From Drop-seq with MIT License | 6 votes |
/** * Just work through the reads one at a time. * @param in * @param out */ void processUnpairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality ,boolean allowPartial) { FilteredReadsMetric m = new FilteredReadsMetric(); ProgressLogger progLog = new ProgressLogger(log); for (final SAMRecord r : in) { progLog.record(r); boolean filterFlag = filterRead(r, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial); if (!filterFlag) { out.addAlignment(r); m.READS_ACCEPTED++; } else { m.READS_REJECTED++; } } writeSummary(summaryFile, m); CloserUtil.close(in); out.close(); reportAndCheckFilterResults(m); }
Example 2
Source File: SparseDge.java From Drop-seq with MIT License | 6 votes |
/** * Load a DGE into a sparse in-memory format. * @param input Either tabular DGE text, or Drop-seq Matrix Market sparse format. May be gzipped. * @param geneEnumerator Genes are assigned indices by this. */ public SparseDge(final File input, final GeneEnumerator geneEnumerator) { try { this.input = input; final BufferedInputStream inputStream = new BufferedInputStream(IOUtil.openFileForReading(input)); final RawLoadedDge rawLoadedDge; if (MatrixMarketReader.isMatrixMarketInteger(input)) rawLoadedDge = loadDropSeqSparseDge(inputStream, input, geneEnumerator); else rawLoadedDge = loadTabularDge(inputStream, input, geneEnumerator); CloserUtil.close(inputStream); header = rawLoadedDge.header; triplets = rawLoadedDge.rawTriplets; sortAndFilterRawDge(rawLoadedDge); } catch (Exception e) { throw new RuntimeException("Problem reading " + input.getAbsolutePath(), e); } }
Example 3
Source File: TestUtils.java From Drop-seq with MIT License | 6 votes |
/** * Test if two text files are the same, ignoring "#" character lines. * @param expected * @param actual * @return */ public static boolean testFilesSame (final File expected, final File actual) { TabbedInputParser e = new TabbedInputParser(true, expected); TabbedInputParser a = new TabbedInputParser(true, actual); while (e.hasNext() && a.hasNext()) { e.next(); a.next(); String le = e.getCurrentLine(); String la = a.getCurrentLine(); if (!le.equals(la)) { CloserUtil.close(e); CloserUtil.close(a); return false; } } CloserUtil.close(e); CloserUtil.close(a); // one of the files is incomplete. if (e.hasNext() || a.hasNext()) return false; return true; }
Example 4
Source File: BciFileFaker.java From picard with MIT License | 6 votes |
public void fakeBciFile(final File bci, final List<Integer> expectedTiles) throws IOException { tiles = expectedTiles; final FileOutputStream fileOutputStream = new FileOutputStream(bci); final FileChannel channel = fileOutputStream.getChannel(); final ByteBuffer buffer = ByteBuffer.allocate(8 * expectedTiles.size()); buffer.order(ByteOrder.LITTLE_ENDIAN); fakeFile(buffer); buffer.flip(); channel.write(buffer); channel.force(true); CloserUtil.close(channel); CloserUtil.close(fileOutputStream); }
Example 5
Source File: DetectBeadSynthesisErrors.java From Drop-seq with MIT License | 6 votes |
/** * For each problematic cell, replace cell barcodes positions with N. * Take the replaced bases and prepend them to the UMI, and trim the last <X> bases off the end of the UMI. */ private void cleanBAM (final Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, final Map<String, String> intendedSequenceMap) { log.info("Cleaning BAM"); final SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(INPUT, true); SamHeaderUtil.addPgRecord(headerAndIterator.header, this); SAMFileWriter writer= new SAMFileWriterFactory().setCreateIndex(CREATE_INDEX).makeSAMOrBAMWriter(headerAndIterator.header, true, OUTPUT); ProgressLogger pl = new ProgressLogger(log); for (SAMRecord r: new IterableAdapter<>(headerAndIterator.iterator)) { pl.record(r); r=padCellBarcodeFix(r, errorBarcodesWithPositions, intendedSequenceMap, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.EXTREME_BASE_RATIO); if (r!=null) writer.addAlignment(r); } CloserUtil.close(headerAndIterator.iterator); writer.close(); }
Example 6
Source File: MultiLevelCollectorTest.java From picard with MIT License | 5 votes |
@Test(dataProvider = "variedAccumulationLevels") public void multilevelCollectorTest(final Set<MetricAccumulationLevel> accumulationLevels) { final SamReader in = SamReaderFactory.makeDefault().open(TESTFILE); final RecordCountMultiLevelCollector collector = new RecordCountMultiLevelCollector(accumulationLevels, in.getFileHeader().getReadGroups()); for (final SAMRecord rec : in) { collector.acceptRecord(rec, null); } collector.finish(); int totalProcessed = 0; int totalMetrics = 0; for(final MetricAccumulationLevel level : accumulationLevels) { final Map<String, Integer> keyToMetrics = accumulationLevelToPerUnitReads.get(level); for(final Map.Entry<String, Integer> entry : keyToMetrics.entrySet()) { final TotalNumberMetric metric = collector.getUnitsToMetrics().get(entry.getKey()); Assert.assertEquals(entry.getValue(), metric.TALLY); Assert.assertTrue(metric.FINISHED); totalProcessed += metric.TALLY; totalMetrics += 1; } } Assert.assertEquals(collector.getUnitsToMetrics().size(), totalMetrics); Assert.assertEquals(totalProcessed, collector.getNumProcessed()); CloserUtil.close(in); }
Example 7
Source File: SingleCellRnaSeqMetricsCollector.java From Drop-seq with MIT License | 5 votes |
public CollectorFactory (final File bamFile, final RnaSeqMetricsCollector.StrandSpecificity specificity, final double rnaFragPct, final File annotationsFile, final File ribosomalIntervals) { this.specificity=specificity; this.rnaFragPct=rnaFragPct; SamReader reader = SamReaderFactory.makeDefault().open(bamFile); geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, reader.getFileHeader().getSequenceDictionary()); // This is a time-consuming call, so invoke it once here so that it isn't invoke over and over by RnaSeqMetricsCollector genesWithLongEnoughTranscripts = geneOverlapDetector.getAll(); log.info("Loaded " + genesWithLongEnoughTranscripts.size() + " genes."); ribosomalBasesInitialValue = ribosomalIntervals != null ? 0L : null; ribosomalSequenceOverlapDetector = RnaSeqMetricsCollector.makeOverlapDetector(bamFile, reader.getFileHeader(), ribosomalIntervals, log); ignoredSequenceIndices = RnaSeqMetricsCollector.makeIgnoredSequenceIndicesSet(reader.getFileHeader(), new HashSet<String>()); CloserUtil.close(reader); }
Example 8
Source File: EnhanceGTFRecordsTest.java From Drop-seq with MIT License | 5 votes |
@Test(enabled=true, expectedExceptions=java.lang.IllegalStateException.class) public void testGeneNoExon () { EnhanceGTFRecords e = new EnhanceGTFRecords(); GTFParser parser = new GTFParser(GTF_FILE3, ValidationStringency.STRICT); List<GTFRecord> records; try { records = e.enhanceGTFRecords(parser); } finally { CloserUtil.close(parser); } Assert.assertNotNull(records); }
Example 9
Source File: BasicInputParser.java From picard with MIT License | 5 votes |
/** * Closes the underlying stream */ public void close() { if (reader != null) { reader.close(); } for(final InputStream stream : inputs){ CloserUtil.close(stream); } }
Example 10
Source File: FixVcfHeaderTest.java From picard with MIT License | 5 votes |
private void runFixVcfHeader(final int checkFirstNRecords, final File replacementHeader, final boolean enforceSampleSamples) throws IOException { final FixVcfHeader program = new FixVcfHeader(); final File outputVcf = VcfTestUtils.createTemporaryIndexedFile("output.", ".vcf"); program.INPUT = INPUT_VCF; program.OUTPUT = outputVcf; if (replacementHeader == null) { program.CHECK_FIRST_N_RECORDS = checkFirstNRecords; } else { program.HEADER = replacementHeader; program.ENFORCE_SAME_SAMPLES = enforceSampleSamples; } Assert.assertEquals(program.instanceMain(new String[0]), 0); final VCFFileReader actualReader = new VCFFileReader(OUTPUT_VCF, false); final VCFFileReader expectedReader = new VCFFileReader(outputVcf, false); // Check that the headers match (order does not matter final VCFHeader actualHeader = actualReader.getFileHeader(); final VCFHeader expectedHeader = expectedReader.getFileHeader(); Assert.assertEquals(actualHeader.getFilterLines().size(), expectedHeader.getFilterLines().size()); Assert.assertEquals(actualHeader.getInfoHeaderLines().size(), expectedHeader.getInfoHeaderLines().size()); Assert.assertEquals(actualHeader.getFormatHeaderLines().size(), expectedHeader.getFormatHeaderLines().size()); // Check the number of records match, since we don't touch them Assert.assertEquals(actualReader.iterator().stream().count(), expectedReader.iterator().stream().count(), "The wrong number of variants was found."); CloserUtil.close(actualReader); CloserUtil.close(expectedReader); }
Example 11
Source File: DetectBeadSubstitutionErrors.java From Drop-seq with MIT License | 5 votes |
private void writeReport (final BottomUpCollapseResult result, final BottomUpCollapseResult resultClean, final UMIsPerCellResult umiResult) { PrintStream outReport = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.OUTPUT_REPORT)); // write comments section, each line starts with a "#" outReport.println("# FILTER_AMBIGUOUS="+FILTER_AMBIGUOUS); outReport.println("# MIN_UMIS_PER_CELL="+MIN_UMIS_PER_CELL); outReport.println("# UMI_BIAS_THRESHOLD="+MIN_UMIS_PER_CELL); outReport.println("# EDIT_DISTANCE="+MIN_UMIS_PER_CELL); outReport.println("#"); outReport.println("# TOTAL_BARCODES_TESTED="+umiResult.getNumCellBarocodesTested()); outReport.println("# BARCODES_COLLAPSED="+result.getUnambiguousSmallBarcodes().size()); outReport.println("# ESTIMATED_UMIS_COLLAPSED="+getTotalAmbiguousUMIs(result.getUnambiguousSmallBarcodes(), umiResult.getUmisPerCell())); outReport.println("# AMBIGUOUS_BARCODES="+result.getAmbiguousBarcodes().size()); outReport.println("# ESTIMATED_AMBIGUOUS_UMIS="+getTotalAmbiguousUMIs(result.getAmbiguousBarcodes(), umiResult.getUmisPerCell())); outReport.println("# POLY_T_BIASED_BARCODES="+umiResult.getPolyTBiasedBarcodes()); outReport.println("# POLY_T_BIASED_BARRCODE_UMIS="+umiResult.getPolyTBiasedUMIs()); outReport.println("# POLY_T_POSITION="+umiResult.getPolyTPosition()); /// write header String [] header= {"intended_barcode", "neighbor_barcode", "intended_size", "neighbor_size", "position", "intended_base", "neighbor_base", "repaired"}; outReport.println(StringUtil.join("\t", header)); ObjectCounter<String> umiCounts=umiResult.getUmisPerCell(); Iterator<String> smalls = result.getUnambiguousSmallBarcodes().iterator(); while (smalls.hasNext()) { String small=smalls.next(); String large = result.getLargerRelatedBarcode(small); BarcodeSubstitutionPair p = new BarcodeSubstitutionPair(large, small); String cleanLarger = resultClean.getLargerRelatedBarcode(small); boolean repaired = cleanLarger!=null; String [] body = {large, small, Integer.toString(umiCounts.getCountForKey(large)), Integer.toString(umiCounts.getCountForKey(small)), Integer.toString(p.getPosition()+1), p.getIntendedBase(), p.getNeighborBase(), Boolean.toString(repaired).toUpperCase()}; outReport.println(StringUtil.join("\t", body)); } CloserUtil.close(outReport); }
Example 12
Source File: ConvertTagToReadGroup.java From Drop-seq with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsWritable(OUTPUT); IOUtil.assertFileIsWritable(INPUT); SamReader in = SamReaderFactory.makeDefault().open(INPUT); final SAMFileHeader inHeader = in.getFileHeader(); SAMReadGroupRecord readGroupTemplate = getReadGroupTemplate(inHeader); Set<String> cellBarcodes = getCellBarcodes (); // get read groups List<SAMReadGroupRecord> rg = getReadGroups(cellBarcodes, readGroupTemplate); final SAMFileHeader outHeader = inHeader.clone(); outHeader.setReadGroups(rg); final SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader,outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT); for (SAMRecord r : in) { String cellBarcode = r.getStringAttribute(this.CELL_BARCODE_TAG); if (cellBarcodes.contains(cellBarcode)) { r.setAttribute(SAMTag.RG.name(), cellBarcode); outWriter.addAlignment(r); } progress.record(r); } CloserUtil.close(in); outWriter.close(); log.info("Tagging finished."); return (0); }
Example 13
Source File: CleanSam.java From picard with MIT License | 5 votes |
/** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE); if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) { factory.validationStringency(ValidationStringency.LENIENT); } final SamReader reader = factory.open(INPUT); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); final CloseableIterator<SAMRecord> it = reader.iterator(); final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class)); // If the read (or its mate) maps off the end of the alignment, clip it while (it.hasNext()) { final SAMRecord rec = it.next(); // If the read (or its mate) maps off the end of the alignment, clip it AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec); // check the read's mapping quality if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) { rec.setMappingQuality(0); } writer.addAlignment(rec); progress.record(rec); } writer.close(); it.close(); CloserUtil.close(reader); return 0; }
Example 14
Source File: ExtractSequences.java From picard with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INTERVAL_LIST); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST); final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary()); final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); for (final Interval interval : intervals) { final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd()); final byte[] bases = seq.getBases(); if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases); try { out.write(">"); out.write(interval.getName()); out.write("\n"); for (int i=0; i<bases.length; ++i) { if (i > 0 && i % LINE_LENGTH == 0) out.write("\n"); out.write(bases[i]); } out.write("\n"); } catch (IOException ioe) { throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe); } } CloserUtil.close(out); return 0; }
Example 15
Source File: MergeBamAlignmentTest.java From picard with MIT License | 4 votes |
@Test public void testMerger() throws Exception { final File output = File.createTempFile("mergeTest", ".sam"); output.deleteOnExit(); doMergeAlignment(unmappedBam, Collections.singletonList(alignedBam), null, null, null, null, false, true, false, 1, "0", "1.0", "align!", "myAligner", true, fasta, output, SamPairUtil.PairOrientation.FR, null, null, null, null, null); SamReader result = SamReaderFactory.makeDefault().open(output); Assert.assertEquals(result.getFileHeader().getSequenceDictionary().getSequences().size(), 8, "Number of sequences did not match"); SAMProgramRecord pg = result.getFileHeader().getProgramRecords().get(0); Assert.assertEquals(pg.getProgramGroupId(), "0"); Assert.assertEquals(pg.getProgramVersion(), "1.0"); Assert.assertEquals(pg.getCommandLine(), "align!"); Assert.assertEquals(pg.getProgramName(), "myAligner"); final SAMReadGroupRecord rg = result.getFileHeader().getReadGroups().get(0); Assert.assertEquals(rg.getReadGroupId(), "0"); Assert.assertEquals(rg.getSample(), "Hi,Mom!"); Assert.assertEquals(result.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate); for (final SAMRecord sam : result) { // This tests that we clip both (a) when the adapter is marked in the unmapped BAM file and // (b) when the insert size is less than the read length if (sam.getReadName().equals("both_reads_align_clip_adapter") || sam.getReadName().equals("both_reads_align_clip_marked")) { Assert.assertEquals(sam.getReferenceName(), "chr7"); if (sam.getReadNegativeStrandFlag()) { Assert.assertEquals(sam.getCigarString(), "5S96M", "Incorrect CIGAR string for " + sam.getReadName()); } else { Assert.assertEquals(sam.getCigarString(), "96M5S", "Incorrect CIGAR string for " + sam.getReadName()); } } // This tests that we DON'T clip when we run off the end if there are equal to or more than // MIN_ADAPTER_BASES hanging off the end else if (sam.getReadName().equals("both_reads_align_min_adapter_bases_exceeded")) { Assert.assertEquals(sam.getReferenceName(), "chr7"); Assert.assertTrue(sam.getCigarString().indexOf("S") == -1, "Read was clipped when it should not be."); } else if (sam.getReadName().equals("neither_read_aligns_or_present")) { Assert.assertTrue(sam.getReadUnmappedFlag(), "Read should be unmapped but isn't"); } // Two pairs in which only the first read should align else if (sam.getReadName().equals("both_reads_present_only_first_aligns") || sam.getReadName().equals("read_2_too_many_gaps")) { if (sam.getFirstOfPairFlag()) { Assert.assertEquals(sam.getReferenceName(), "chr7", "Read should be mapped but isn't"); } else { Assert.assertTrue(sam.getReadUnmappedFlag(), "Read should not be mapped but is"); } } else { throw new Exception("Unexpected read name: " + sam.getReadName()); } final boolean pos = !sam.getReadNegativeStrandFlag(); Assert.assertEquals(sam.getAttribute("aa"), pos ? "Hello" : "olleH"); Assert.assertEquals(sam.getAttribute("ab"), pos ? "ATTCGG" : "CCGAAT"); Assert.assertEquals(sam.getAttribute("ac"), pos ? new byte[] {1,2,3} : new byte[] {3,2,1}); Assert.assertEquals(sam.getAttribute("as"), pos ? new short[]{1,2,3} : new short[]{3,2,1}); Assert.assertEquals(sam.getAttribute("ai"), pos ? new int[] {1,2,3} : new int[] {3,2,1}); Assert.assertEquals(sam.getAttribute("af"), pos ? new float[]{1,2,3} : new float[]{3,2,1}); } // Quick test to make sure the program record gets picked up from the file if not specified // on the command line. doMergeAlignment(unmappedBam, Collections.singletonList(alignedBam), null, null, null, null, false, true, false, 1, null, null, null, null, true, fasta, output, SamPairUtil.PairOrientation.FR, null, null, null, null, null); CloserUtil.close(result); result = SamReaderFactory.makeDefault().open(output); pg = result.getFileHeader().getProgramRecords().get(0); Assert.assertEquals(pg.getProgramGroupId(), "1", "Program group ID not picked up correctly from aligned BAM"); Assert.assertEquals(pg.getProgramVersion(), "2.0", "Program version not picked up correctly from aligned BAM"); Assert.assertNull(pg.getCommandLine(), "Program command line not picked up correctly from aligned BAM"); Assert.assertEquals(pg.getProgramName(), "Hey!", "Program name not picked up correctly from aligned BAM"); CloserUtil.close(result); }
Example 16
Source File: RevertSamTest.java From picard with MIT License | 4 votes |
private void verifyPositiveResults( final File outputFile, final RevertSam reverter, final boolean removeDuplicates, final boolean removeAlignmentInfo, final boolean restoreOriginalQualities, final boolean outputByReadGroup, final String readGroupId, final int numReadsExpected, final String sample, final String library) { outputFile.deleteOnExit(); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(outputFile); final SAMFileHeader header = reader.getFileHeader(); Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.queryname); Assert.assertEquals(header.getProgramRecords().size(), removeAlignmentInfo ? 0 : 1); final List<SAMReadGroupRecord> readGroups = header.getReadGroups(); if (outputByReadGroup) { Assert.assertEquals(readGroups.size(), 1); Assert.assertEquals(readGroups.get(0).getId(), readGroupId); } for (final SAMReadGroupRecord rg : header.getReadGroups()) { if (sample != null) { Assert.assertEquals(rg.getSample(), sample); } else { Assert.assertEquals(rg.getSample(), "Hi,Mom!"); } if (library != null) { Assert.assertEquals(rg.getLibrary(), library); } else { Assert.assertEquals(rg.getLibrary(), "my-library"); } } int numReads = 0; for (final SAMRecord rec : reader) { numReads++; if (removeDuplicates) { Assert.assertFalse(rec.getDuplicateReadFlag(), "Duplicates should have been removed: " + rec.getReadName()); } if (removeAlignmentInfo) { Assert.assertTrue(rec.getReadUnmappedFlag(), "Alignment info should have been removed: " + rec.getReadName()); } if (restoreOriginalQualities && !unmappedRead.equals( rec.getReadName() + "/" + (rec.getFirstOfPairFlag() ? "1" : "2"))) { Assert.assertEquals(rec.getBaseQualityString(), revertedQualities); } else { Assert.assertNotSame(rec.getBaseQualityString(), revertedQualities); } for (final SAMRecord.SAMTagAndValue attr : rec.getAttributes()) { if (removeAlignmentInfo || (!attr.tag.equals("PG") && !attr.tag.equals("NM") && !attr.tag.equals(SAMTag.MQ.toString()))) { Assert.assertFalse(reverter.ATTRIBUTE_TO_CLEAR.contains(attr.tag), attr.tag + " should have been cleared."); } } } Assert.assertEquals(numReads, numReadsExpected); CloserUtil.close(reader); }
Example 17
Source File: MultiCellDigitalAlleleCountsIterator.java From Drop-seq with MIT License | 4 votes |
@Override public void close() { CloserUtil.close(this.iter); }
Example 18
Source File: SelectCellsByNumTranscripts.java From Drop-seq with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsWritable(OUTPUT); if (MIN_READS_PER_CELL == null || MIN_READS_PER_CELL < MIN_TRANSCRIPTS_PER_CELL) MIN_READS_PER_CELL = MIN_TRANSCRIPTS_PER_CELL; final List<String> cellBarcodes; if (INPUT_INTERIM_CELLS == null) cellBarcodes = new BarcodeListRetrieval().getListCellBarcodesByReadCount( this.INPUT, this.CELL_BARCODE_TAG, this.READ_MQ, this.MIN_READS_PER_CELL, null); else cellBarcodes = readBarcodes(INPUT_INTERIM_CELLS); if (OUTPUT_INTERIM_CELLS != null) writeBarcodes(OUTPUT_INTERIM_CELLS, cellBarcodes); SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false); final MapContainer mapContainer; if (ORGANISM != null && !ORGANISM.isEmpty()) { headerAndIterator = new SamHeaderAndIterator(headerAndIterator.header, new PrefixGeneWithOrganismIterator(headerAndIterator.iterator)); mapContainer = new MultiOrganismMapContainer(cellBarcodes); } else mapContainer = new SingleOrganismMapContainer(cellBarcodes); // gene/exon tags are sorted first, followed by cells UMIIterator umiIterator = new UMIIterator(headerAndIterator, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.READ_MQ, false, cellBarcodes); String gene = null; UMICollection batch; while ((batch=umiIterator.next())!=null) { if (batch.isEmpty()) continue; String currentGene = batch.getGeneName(); // if just starting the loop if (gene==null) gene=currentGene; // you've gathered all the data for the gene, write it out and start on the next. if (!gene.equals(currentGene)) mapContainer.addToSummary(gene); // Accumulate this gene gene=currentGene; mapContainer.countExpression(gene, batch.getCellBarcode(), batch.getDigitalExpression(0, this.EDIT_DISTANCE, false)); } // write out remainder mapContainer.addToSummary(gene); final Map<String, Integer> transcriptsPerCell = mapContainer.getTranscriptCountForCellBarcodesOverTranscriptThreshold(MIN_TRANSCRIPTS_PER_CELL); log.info("Found " + transcriptsPerCell.size() + " cells with enough transcripts"); final Map.Entry<String, Integer> transcriptsPerCellArray[] = transcriptsPerCell.entrySet().toArray(new Map.Entry[transcriptsPerCell.size()]); Arrays.sort(transcriptsPerCellArray, new EntryComparator()); final List<String> finalBarcodes = new ArrayList<>(transcriptsPerCellArray.length); for (final Map.Entry<String, Integer> entry: transcriptsPerCellArray) finalBarcodes.add(entry.getKey()); writeBarcodes(OUTPUT, finalBarcodes); log.info("Wrote cell barcodes to " + OUTPUT.getAbsolutePath()); CloserUtil.close(umiIterator); if (METRICS != null) { final Metrics m = mapContainer.accumulateMetrics(transcriptsPerCell.keySet()); MetricsFile<Metrics, Integer> out = getMetricsFile(); out.addMetric(m); out.write(METRICS); } return 0; }
Example 19
Source File: MultiLevelReducibleCollectorUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test(dataProvider = "variedAccumulationLevels") public void multilevelCollectorTest(final Set<MetricAccumulationLevel> accumulationLevels) { final SamReader in = SamReaderFactory.makeDefault().open(TESTFILE); final RecordCountMultiLevelCollector collector1 = new RecordCountMultiLevelCollector( accumulationLevels, in.getFileHeader().getReadGroups()); final RecordCountMultiLevelCollector collector2 = new RecordCountMultiLevelCollector( accumulationLevels, in.getFileHeader().getReadGroups()); //distribute the reads across the two collectors int count = 1; for (final SAMRecord rec : in) { if (count % 2 == 0) { collector1.acceptRecord(rec, null); } else { collector2.acceptRecord(rec, null); } count++; } collector1.finish(); collector2.finish(); // combine the results into collector1 collector1.combine(collector2); int totalProcessed = 0; int totalMetrics = 0; for(final MetricAccumulationLevel level : accumulationLevels) { final Map<String, Integer> keyToMetrics = accumulationLevelToPerUnitReads.get(level); for(final Map.Entry<String, Integer> entry : keyToMetrics.entrySet()) { final TotalNumberMetric metric = collector1.getUnitsToMetrics().get(entry.getKey()); Assert.assertEquals(entry.getValue(), metric.TALLY); Assert.assertTrue(metric.FINISHED); totalProcessed += metric.TALLY; totalMetrics += 1; } } Assert.assertEquals(collector1.getUnitsToMetrics().size(), totalMetrics); Assert.assertEquals(totalProcessed, collector1.getNumProcessed()); CloserUtil.close(in); }
Example 20
Source File: CollectVariantCallingMetrics.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(DBSNP); if (TARGET_INTERVALS != null) IOUtil.assertFileIsReadable(TARGET_INTERVALS); if (SEQUENCE_DICTIONARY != null) IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY.toPath()); final boolean requiresIndex = this.TARGET_INTERVALS != null || this.THREAD_COUNT > 1; final VCFFileReader variantReader = new VCFFileReader(INPUT, requiresIndex); final VCFHeader vcfHeader = variantReader.getFileHeader(); CloserUtil.close(variantReader); final SAMSequenceDictionary sequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY == null ? INPUT.toPath() : SEQUENCE_DICTIONARY.toPath()); final IntervalList targetIntervals = (TARGET_INTERVALS == null) ? null : IntervalList.fromFile(TARGET_INTERVALS).uniqued(); log.info("Loading dbSNP file ..."); final DbSnpBitSetUtil.DbSnpBitSets dbsnp = DbSnpBitSetUtil.createSnpAndIndelBitSets(DBSNP, sequenceDictionary, targetIntervals, Optional.of(log)); log.info("Starting iteration of variants."); final VariantProcessor.Builder<CallingMetricAccumulator, CallingMetricAccumulator.Result> builder = VariantProcessor.Builder .generatingAccumulatorsBy(() -> { CallingMetricAccumulator accumulator = GVCF_INPUT ? new GvcfMetricAccumulator(dbsnp) : new CallingMetricAccumulator(dbsnp); accumulator.setup(vcfHeader); return accumulator; }) .combiningResultsBy(CallingMetricAccumulator.Result::merge) .withInput(INPUT) .multithreadingBy(THREAD_COUNT); if (targetIntervals != null) { builder.limitingProcessedRegionsTo(targetIntervals); } final CallingMetricAccumulator.Result result = builder.build().process(); // Fetch and write the metrics. final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile(); final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile(); summary.addMetric(result.summary); result.details.forEach(detail::addMetric); final String outputPrefix = OUTPUT.getAbsolutePath() + "."; detail.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension())); summary.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension())); return 0; }