Java Code Examples for htsjdk.samtools.SAMFileWriter#close()
The following examples show how to use
htsjdk.samtools.SAMFileWriter#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PreprocessingTools.java From halvade with GNU General Public License v3.0 | 6 votes |
public int callElPrep(String input, String output, String rg, int threads, SAMRecordIterator SAMit, SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, QualityException { SAMRecord sam; SAMFileWriterFactory factory = new SAMFileWriterFactory(); SAMFileWriter Swriter = factory.makeSAMWriter(header, true, new File(input)); int reads = 0; while(SAMit.hasNext()) { sam = SAMit.next(); if(updateRG) sam.setAttribute(SAMTag.RG.name(), RGID); Swriter.addAlignment(sam); reads++; } Swriter.close(); String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", ""); String[] command = CommandGenerator.elPrep(bin, input, output, threads, true, rg, null, !keepDups, customArgs); long estimatedTime = runProcessAndWait("elPrep", command); if(context != null) context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime); return reads; }
Example 2
Source File: SortSam.java From picard with MIT License | 6 votes |
protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); ; reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder()); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection")); final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read"); for (final SAMRecord rec : reader) { writer.addAlignment(rec); progress.record(rec); } log.info("Finished reading inputs, merging and writing to output now."); CloserUtil.close(reader); writer.close(); return 0; }
Example 3
Source File: GatherBamFiles.java From picard with MIT License | 6 votes |
/** * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate * multiple BAM files. */ private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5, final File referenceFasta) { final SAMFileHeader header; { header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0)); } final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output); for (final File f : inputs) { log.info("Gathering " + f.getAbsolutePath()); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); for (final SAMRecord rec : in) out.addAlignment(rec); CloserUtil.close(in); } out.close(); }
Example 4
Source File: BamSlicerApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException { String inputPath = cmd.getOptionValue(INPUT); String vcfPath = cmd.getOptionValue(VCF); int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")); SamReaderFactory readerFactory = createFromCommandLine(cmd); SamReader reader = readerFactory.open(new File(inputPath)); QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity); CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals); SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true) .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT))); writeToSlice(writer, iterator); writer.close(); reader.close(); }
Example 5
Source File: CollectGcBiasMetricsTest.java From picard with MIT License | 6 votes |
public File build (final List<SAMRecordSetBuilder> setBuilder, final File unsortedSam, final SAMFileHeader header) throws IOException { final File sortedSam = VcfTestUtils.createTemporaryIndexedFile("CollectGcBias", ".bam"); final SAMFileWriter writer = new SAMFileWriterFactory() .setCreateIndex(true).makeBAMWriter(header, false, unsortedSam); for (final SAMRecordSetBuilder subSetBuilder : setBuilder) { for (final SAMRecord record : subSetBuilder) { writer.addAlignment(record); } } writer.close(); final SortSam sorter = new SortSam(); final String[] args = new String[] { "INPUT=" + unsortedSam.getAbsolutePath(), "OUTPUT=" + sortedSam.getAbsolutePath(), "SORT_ORDER=coordinate" }; sorter.instanceMain(args); return sortedSam; }
Example 6
Source File: TagReadWithGeneExonFunction.java From Drop-seq with MIT License | 5 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(this.INPUT); IOUtil.assertFileIsReadable(this.ANNOTATIONS_FILE); if (this.SUMMARY!=null) IOUtil.assertFileIsWritable(this.SUMMARY); IOUtil.assertFileIsWritable(this.OUTPUT); SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT); SAMFileHeader header = inputSam.getFileHeader(); SamHeaderUtil.addPgRecord(header, this); SAMSequenceDictionary bamDict = header.getSequenceDictionary(); final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, bamDict); SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT); for (SAMRecord r: inputSam) { pl.record(r); if (!r.getReadUnmappedFlag()) r= setAnnotations(r, geneOverlapDetector); writer.addAlignment(r); } CloserUtil.close(inputSam); writer.close(); if (this.USE_STRAND_INFO) log.info(this.metrics.toString()); if (SUMMARY==null) return 0; //process summary MetricsFile<ReadTaggingMetric, Integer> outFile = new MetricsFile<>(); outFile.addMetric(this.metrics); outFile.write(this.SUMMARY); return 0; }
Example 7
Source File: CleanSam.java From picard with MIT License | 5 votes |
/** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE); if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) { factory.validationStringency(ValidationStringency.LENIENT); } final SamReader reader = factory.open(INPUT); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); final CloseableIterator<SAMRecord> it = reader.iterator(); final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class)); // If the read (or its mate) maps off the end of the alignment, clip it while (it.hasNext()) { final SAMRecord rec = it.next(); // If the read (or its mate) maps off the end of the alignment, clip it AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec); // check the read's mapping quality if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) { rec.setMappingQuality(0); } writer.addAlignment(rec); progress.record(rec); } writer.close(); it.close(); CloserUtil.close(reader); return 0; }
Example 8
Source File: BAMTestUtil.java From Hadoop-BAM with MIT License | 5 votes |
public static File writeBamFileWithLargeHeader() throws IOException { SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname); for (int i = 0; i < 1000; i++) { int chr = 20; int start1 = (i + 1) * 1000; int start2 = start1 + 100; samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1, start2); } final File bamFile = File.createTempFile("test", ".bam"); bamFile.deleteOnExit(); SAMFileHeader samHeader = samRecordSetBuilder.getHeader(); StringBuffer sb = new StringBuffer(); for (int i = 0; i < 1000000; i++) { sb.append("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"); } samHeader.addComment(sb.toString()); final SAMFileWriter bamWriter = new SAMFileWriterFactory() .makeSAMOrBAMWriter(samHeader, true, bamFile); for (final SAMRecord rec : samRecordSetBuilder.getRecords()) { bamWriter.addAlignment(rec); } bamWriter.close(); return bamFile; }
Example 9
Source File: SortedSAMWriter.java From abra2 with MIT License | 5 votes |
public void outputFinal(int sampleIdx, String inputBam) throws IOException { Logger.info("Finishing: " + outputFiles[sampleIdx]); SAMRecord[] readsByNameArray = null; SAMRecord[] readsByCoordArray = null; if (shouldSort) { // Initialize internal read buffers used by SortingCollection2 // These are initialized once per thread and re-used each time // a SortingCollection2 is initialized. readsByNameArray = new SAMRecord[maxRecordsInRam]; readsByCoordArray = new SAMRecord[maxRecordsInRam]; // Only allow buffering if sorting writerFactory.setUseAsyncIo(true); writerFactory.setAsyncOutputBufferSize(ASYNC_READ_CACHE_SIZE); writerFactory.setCreateIndex(shouldCreateIndex); } else { writerFactory.setUseAsyncIo(false); } writerFactory.setCompressionLevel(finalCompressionLevel); if (shouldSort) { samHeaders[sampleIdx].setSortOrder(SortOrder.coordinate); } else { samHeaders[sampleIdx].setSortOrder(SortOrder.unsorted); } SAMFileWriter output = writerFactory.makeBAMWriter(samHeaders[sampleIdx], true, new File(outputFiles[sampleIdx]), finalCompressionLevel); for (String chromosome : chromosomeChunker.getChromosomes()) { processChromosome(output, sampleIdx, chromosome, readsByNameArray, readsByCoordArray); } processUnmapped(output, inputBam); output.close(); }
Example 10
Source File: SamBamUtils.java From chipster with MIT License | 5 votes |
private static void closeIfPossible(SAMFileWriter writer) { if (writer != null) { try { writer.close(); } catch (Exception e) { // Ignore } } }
Example 11
Source File: FilterBamByTag.java From Drop-seq with MIT License | 5 votes |
/** * * @param in * @param out * @param values */ void processPairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality, boolean allowPartial) { ProgressLogger progLog = new ProgressLogger(log); FilteredReadsMetric m = new FilteredReadsMetric(); PeekableIterator<SAMRecord> iter = new PeekableIterator<>(CustomBAMIterators.getQuerynameSortedRecords(in)); while (iter.hasNext()) { SAMRecord r1 = iter.next(); progLog.record(r1); boolean filterFlag1 = filterRead(r1, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial); SAMRecord r2 = null; if (iter.hasNext()) r2 = iter.peek(); // check for r2 being null in case the last read is unpaired. if (r2!=null && r1.getReadName().equals(r2.getReadName())) { // paired read found. progLog.record(r2); r2=iter.next(); boolean filterFlag2 = filterRead(r2, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial); // if in accept tag mode, if either read shouldn't be filterd accept the pair // if in reject mode, if both reads shouldn't be filtered to accept the pair. if ((!filterFlag1 || !filterFlag2 & this.ACCEPT_TAG) || (!filterFlag1 && !filterFlag2 & !this.ACCEPT_TAG)) { out.addAlignment(r1); out.addAlignment(r2); m.READS_ACCEPTED+=2; } else m.READS_REJECTED+=2; } else if (!filterFlag1) { out.addAlignment(r1); m.READS_ACCEPTED++; } else { m.READS_REJECTED++; } } CloserUtil.close(in); writeSummary(summaryFile, m); out.close(); reportAndCheckFilterResults(m); }
Example 12
Source File: Evidence2SAM.java From cramtools with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, ParseException { EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0])); Read context = new Read(); SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747); samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID); String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome); SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup); readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample); readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup); readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\""); Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt); readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG, new SimpleDateFormat("yyyy-MM-dd").format(date)); readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\""); header.addReadGroup(readGroupRecord); header.addSequence(samSequenceRecord); SAMFileWriterFactory f = new SAMFileWriterFactory(); SAMFileWriter samWriter; if (args.length > 1) samWriter = f.makeBAMWriter(header, false, new File(args[1])); else samWriter = f.makeSAMWriter(header, false, System.out); int i = 0; long time = System.currentTimeMillis(); DedupIterator dedupIt = new DedupIterator(iterator); while (dedupIt.hasNext()) { EvidenceRecord evidenceRecord = dedupIt.next(); if (evidenceRecord == null) throw new RuntimeException(); try { context.reset(evidenceRecord); context.parse(); } catch (Exception e) { System.err.println("Failed on line:"); System.err.println(evidenceRecord.line); throw new RuntimeException(e); } SAMRecord[] samRecords = context.toSAMRecord(header); for (SAMRecord samRecord : samRecords) { samRecord.setAttribute(SAMTag.RG.name(), readGroup); samWriter.addAlignment(samRecord); } i++; if (i % 1000 == 0) { if (System.currentTimeMillis() - time > 10 * 1000) { time = System.currentTimeMillis(); System.err.println(i); } } if (i > 10000) break; } samWriter.close(); }
Example 13
Source File: CollapseTagWithContext.java From Drop-seq with MIT License | 4 votes |
@Override protected int doWork() { int vc = validateCommands(); if (vc>0) return vc; if (this.COUNT_TAGS_EDIT_DISTANCE>0) this.medUMI = new MapBarcodesByEditDistance(false); med = new MapBarcodesByEditDistance(false, this.NUM_THREADS, 0); PrintStream outMetrics = null; if (this.ADAPTIVE_ED_METRICS_FILE!=null) { outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.ADAPTIVE_ED_METRICS_FILE)); writeAdaptiveEditDistanceMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics); } if (this.MUTATIONAL_COLLAPSE_METRICS_FILE!=null) { med = new MapBarcodesByEditDistance(true, this.NUM_THREADS, 1000); outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.MUTATIONAL_COLLAPSE_METRICS_FILE)); writeMutationalCollapseMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics); } IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); SamReader reader = SamReaderFactory.makeDefault().open(INPUT); SAMFileHeader header = reader.getFileHeader(); SortOrder sortOrder= header.getSortOrder(); SAMFileWriter writer = getWriter (reader); final ObjectSink<SAMRecord> recSink = new SamWriterSink(writer); PeekableGroupingIterator<SAMRecord> groupingIter = orderReadsByTagsPeekable(reader, this.COLLAPSE_TAG, this.CONTEXT_TAGS, this.READ_MQ, this.OUT_TAG, recSink); log.info("Collapsing tag and writing results"); if (!LOW_MEMORY_MODE) fasterIteration(groupingIter, writer, outMetrics); else lowMemoryIteration(groupingIter, writer, outMetrics, header); log.info("Re-sorting output BAM in "+ sortOrder.toString()+ " if neccesary"); CloserUtil.close(groupingIter); CloserUtil.close(reader); writer.close(); if (outMetrics!=null) CloserUtil.close(outMetrics); log.info("DONE"); return 0; }
Example 14
Source File: Transcode.java From cramtools with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException { Params params = new Params(); JCommander jc = new JCommander(params); jc.parse(args); Log.setGlobalLogLevel(params.logLevel); if (args.length == 0 || params.help) { usage(jc); System.exit(1); } if (params.reference == null) { System.out.println("Reference file not found, will try downloading..."); } ReferenceSource referenceSource = null; if (params.reference != null) { System.setProperty("reference", params.reference.getAbsolutePath()); referenceSource = new ReferenceSource(params.reference); } else { String prop = System.getProperty("reference"); if (prop != null) { referenceSource = new ReferenceSource(new File(prop)); } } SamReaderFactory factory = SamReaderFactory.make().validationStringency(params.validationLevel); SamInputResource r; if ("file".equalsIgnoreCase(params.url.getProtocol())) r = SamInputResource.of(params.url.getPath()); else r = SamInputResource.of(params.url); SamReader reader = factory.open(r); SAMRecordIterator iterator = reader.iterator(); SAMFileWriterFactory writerFactory = new SAMFileWriterFactory(); SAMFileWriter writer = null; OutputStream os = new BufferedOutputStream(new FileOutputStream(params.outputFile)); switch (params.outputFormat) { case BAM: writer = writerFactory.makeBAMWriter(reader.getFileHeader(), reader.getFileHeader().getSortOrder() == SortOrder.coordinate, os); break; case CRAM: writer = writerFactory.makeCRAMWriter(reader.getFileHeader(), os, params.reference); break; default: System.out.println("Unknown output format: " + params.outputFormat); System.exit(1); } while (iterator.hasNext()) { writer.addAlignment(iterator.next()); } writer.close(); reader.close(); }
Example 15
Source File: FilterBam.java From Drop-seq with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); buildPatterns(); SamReader in = SamReaderFactory.makeDefault().open(INPUT); SAMFileHeader fileHeader = editSequenceDictionary(in.getFileHeader().clone()); SamHeaderUtil.addPgRecord(fileHeader, this); SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, true, OUTPUT); ProgressLogger progLog=new ProgressLogger(log); final boolean sequencesRemoved = fileHeader.getSequenceDictionary().getSequences().size() != in.getFileHeader().getSequenceDictionary().getSequences().size(); FilteredReadsMetric m = new FilteredReadsMetric(); for (final SAMRecord r : in) { progLog.record(r); if (!filterRead(r)) { String sequenceName = stripReferencePrefix(r.getReferenceName()); String mateSequenceName = null; if (r.getMateReferenceIndex() != -1) mateSequenceName = stripReferencePrefix(r.getMateReferenceName()); if (sequencesRemoved || sequenceName != null) { if (sequenceName == null) sequenceName = r.getReferenceName(); // Even if sequence name has not been edited, if sequences have been removed, need to set // reference name again to invalidate reference index cache. r.setReferenceName(sequenceName); } if (r.getMateReferenceIndex() != -1 && (sequencesRemoved || mateSequenceName != null)) { if (mateSequenceName == null) mateSequenceName = r.getMateReferenceName(); // It's possible that the mate was mapped to a reference sequence that has been dropped from // the sequence dictionary. If so, set the mate to be unmapped. if (fileHeader.getSequenceDictionary().getSequence(mateSequenceName) != null) r.setMateReferenceName(mateSequenceName); else { r.setMateUnmappedFlag(true); r.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); r.setMateAlignmentStart(0); } } out.addAlignment(r); m.READS_ACCEPTED++; } else { m.READS_REJECTED++; } } // write the summary if the summary file is not null. writeSummary(this.SUMMARY, m); CloserUtil.close(in); out.close(); FilterProgramUtils.reportAndCheckFilterResults("reads", m.READS_ACCEPTED, m.READS_REJECTED, PASSING_READ_THRESHOLD, log); return (0); }
Example 16
Source File: MergeBamAlignmentTest.java From picard with MIT License | 4 votes |
private void testBestFragmentMapqStrategy(final String testName, final int[] firstMapQs, final int[] secondMapQs, final boolean includeSecondary, final int expectedFirstMapq, final int expectedSecondMapq) throws Exception { final File unmappedSam = File.createTempFile("unmapped.", ".sam"); unmappedSam.deleteOnExit(); final SAMFileWriterFactory factory = new SAMFileWriterFactory(); final SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.queryname); final String readName = "theRead"; final SAMRecord firstUnmappedRead = new SAMRecord(header); firstUnmappedRead.setReadName(readName); firstUnmappedRead.setReadString("ACGTACGTACGTACGT"); firstUnmappedRead.setBaseQualityString("5555555555555555"); firstUnmappedRead.setReadUnmappedFlag(true); firstUnmappedRead.setMateUnmappedFlag(true); firstUnmappedRead.setReadPairedFlag(true); firstUnmappedRead.setFirstOfPairFlag(true); final SAMRecord secondUnmappedRead = new SAMRecord(header); secondUnmappedRead.setReadName(readName); secondUnmappedRead.setReadString("TCGAACGTTCGAACTG"); secondUnmappedRead.setBaseQualityString("6666666666666666"); secondUnmappedRead.setReadUnmappedFlag(true); secondUnmappedRead.setMateUnmappedFlag(true); secondUnmappedRead.setReadPairedFlag(true); secondUnmappedRead.setSecondOfPairFlag(true); final SAMFileWriter unmappedWriter = factory.makeSAMWriter(header, false, unmappedSam); unmappedWriter.addAlignment(firstUnmappedRead); unmappedWriter.addAlignment(secondUnmappedRead); unmappedWriter.close(); final File alignedSam = File.createTempFile("aligned.", ".sam"); alignedSam.deleteOnExit(); final String sequence = "chr1"; // Populate the header with SAMSequenceRecords header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2.toPath())); final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); addAlignmentsForBestFragmentMapqStrategy(alignedWriter, firstUnmappedRead, sequence, firstMapQs); addAlignmentsForBestFragmentMapqStrategy(alignedWriter, secondUnmappedRead, sequence, secondMapQs); alignedWriter.close(); final File output = File.createTempFile("testBestFragmentMapqStrategy." + testName, ".sam"); output.deleteOnExit(); doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), null, null, null, null, false, true, false, 1, "0", "1.0", "align!", "myAligner", true, fasta, output, SamPairUtil.PairOrientation.FR, MergeBamAlignment.PrimaryAlignmentStrategy.BestEndMapq, null, includeSecondary, null, null); final SamReader reader = SamReaderFactory.makeDefault().open(output); int numFirstRecords = 0; int numSecondRecords = 0; int firstPrimaryMapq = -1; int secondPrimaryMapq = -1; for (final SAMRecord rec: reader) { Assert.assertTrue(rec.getReadPairedFlag()); if (rec.getFirstOfPairFlag()) ++numFirstRecords; else if (rec.getSecondOfPairFlag()) ++ numSecondRecords; else Assert.fail("unpossible!"); if (!rec.getReadUnmappedFlag() && !rec.getNotPrimaryAlignmentFlag()) { if (rec.getFirstOfPairFlag()) { Assert.assertEquals(firstPrimaryMapq, -1); firstPrimaryMapq = rec.getMappingQuality(); } else { Assert.assertEquals(secondPrimaryMapq, -1); secondPrimaryMapq = rec.getMappingQuality(); } } else if (rec.getNotPrimaryAlignmentFlag()) { Assert.assertTrue(rec.getMateUnmappedFlag()); } } reader.close(); Assert.assertEquals(firstPrimaryMapq, expectedFirstMapq); Assert.assertEquals(secondPrimaryMapq, expectedSecondMapq); if (!includeSecondary) { Assert.assertEquals(numFirstRecords, 1); Assert.assertEquals(numSecondRecords, 1); } else { // If no alignments for an end, there will be a single unmapped record Assert.assertEquals(numFirstRecords, Math.max(1, firstMapQs.length)); Assert.assertEquals(numSecondRecords, Math.max(1, secondMapQs.length)); } }
Example 17
Source File: MergeSamFiles.java From picard with MIT License | 4 votes |
/** Combines multiple SAM/BAM files into one. */ @Override protected int doWork() { boolean matchedSortOrders = true; // read interval list if it is defined final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals() ); // map reader->iterator used if INTERVALS is defined final Map<SamReader, CloseableIterator<SAMRecord> > samReaderToIterator = new HashMap<>(INPUT.size()); final List<Path> inputPaths = IOUtil.getPaths(INPUT); // Open the files for reading and writing final List<SamReader> readers = new ArrayList<>(); final List<SAMFileHeader> headers = new ArrayList<>(); { SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory for (final Path inFile : inputPaths) { IOUtil.assertFileIsReadable(inFile); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile); if (INTERVALS != null) { if (!in.hasIndex()) { throw new PicardException("Merging with interval but BAM file is not indexed: " + inFile); } final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(in, intervalList, true); samReaderToIterator.put(in, samIterator); } readers.add(in); headers.add(in.getFileHeader()); // A slightly hackish attempt to keep memory consumption down when merging multiple files with // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, then // replace the duplicate copies with a single dictionary to reduce the memory footprint. if (dict == null) { dict = in.getFileHeader().getSequenceDictionary(); } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) { in.getFileHeader().setSequenceDictionary(dict); } matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER; } } // If all the input sort orders match the output sort order then just merge them and // write on the fly, otherwise setup to merge and sort before writing out the final file IOUtil.assertFileIsWritable(OUTPUT); final boolean presorted; final SAMFileHeader.SortOrder headerMergerSortOrder; final boolean mergingSamRecordIteratorAssumeSorted; if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null ) { log.info("Input files are in same order as output so sorting to temp directory is not needed."); headerMergerSortOrder = SORT_ORDER; mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED; presorted = true; } else { log.info("Sorting input files using temp directory " + TMP_DIR); headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted; mergingSamRecordIteratorAssumeSorted = false; presorted = false; } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES); final MergingSamRecordIterator iterator; // no interval defined, get an iterator for the whole bam if (intervalList == null) { iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted); } else { // show warning related to https://github.com/broadinstitute/picard/pull/314/files log.info("Warning: merged bams from different interval lists may contain the same read in both files"); iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true); } final SAMFileHeader header = headerMerger.getMergedHeader(); for (final String comment : COMMENT) { header.addComment(comment); } header.setSortOrder(SORT_ORDER); final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); if (USE_THREADING) { samFileWriterFactory.setUseAsyncIo(true); } final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT); // Lastly loop through and write out the records final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); out.addAlignment(record); progress.record(record); } log.info("Finished reading inputs."); for(final CloseableIterator<SAMRecord> iter : samReaderToIterator.values()) CloserUtil.close(iter); CloserUtil.close(readers); out.close(); return 0; }
Example 18
Source File: CollectInsertSizeMetricsTest.java From picard with MIT License | 4 votes |
@Test public void testWidthOfMetrics() throws IOException { final File testSamFile = File.createTempFile("CollectInsertSizeMetrics", ".bam"); testSamFile.deleteOnExit(); final File testSamFileIndex = new File(testSamFile.getParentFile(),testSamFile.getName().replaceAll("bam$","bai")); testSamFileIndex.deleteOnExit(); new File(testSamFile.getAbsolutePath().replace(".bam$",".bai")).deleteOnExit(); final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true); setBuilder.setReadLength(10); final int insertBy = 3; // the # of bases to increase the insert by in the records below. int queryIndex = 0; // Create records such that we have 10 records in the 10th through the 90th percentiles, and 5 for the 95th and 99th percentiles. // WIDTH_OF_10_PERCENT through WIDTH_OF_90_PERCENT (90 pairs total)) for (int j = 0; j < 9; j++) { for (int i = 0; i < 5; i++, queryIndex++) { setBuilder.addPair("query:" + queryIndex, 0, 1, 50 + j*insertBy, false, false, "10M", "10M", false, true, 60); } for (int i = 0; i < 5; i++, queryIndex++) { setBuilder.addPair("query:" + queryIndex, 0, 1, 50 - j*insertBy, false, false, "10M", "10M", false, true, 60); } } // WIDTH_OF_95_PERCENT through WIDTH_OF_99_PERCENT (10 pairs total) for (int j = 9; j < 11; j++) { for (int i = 0; i < 3; i++, queryIndex++) { setBuilder.addPair("query:" + queryIndex, 0, 1, 50 + j*insertBy, false, false, "10M", "10M", false, true, 60); } for (int i = 0; i < 2; i++, queryIndex++) { setBuilder.addPair("query:" + queryIndex, 0, 1, 50 - j*insertBy, false, false, "10M", "10M", false, true, 60); } } // Add one to make the an odd # of pairs for the median setBuilder.addPair("query:" + queryIndex, 0, 1, 50, false, false, "10M", "10M", false, true, 60); final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile); setBuilder.forEach(writer::addAlignment); writer.close(); final File outfile = File.createTempFile("test", ".insert_size_metrics"); final File pdf = File.createTempFile("test", ".pdf"); outfile.deleteOnExit(); pdf.deleteOnExit(); final String[] args = new String[] { "INPUT=" + testSamFile.getAbsolutePath(), "OUTPUT=" + outfile.getAbsolutePath(), "Histogram_FILE=" + pdf.getAbsolutePath() }; Assert.assertEquals(runPicardCommandLine(args), 0); final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<InsertSizeMetrics, Comparable<?>>(); output.read(new FileReader(outfile)); final List<InsertSizeMetrics> metrics = output.getMetrics(); Assert.assertEquals(metrics.size(), 1); final InsertSizeMetrics metric = metrics.get(0); Assert.assertEquals(metric.PAIR_ORIENTATION.name(), "FR"); Assert.assertEquals((int) metric.MEDIAN_INSERT_SIZE, 59); Assert.assertEquals((int) metric.MODE_INSERT_SIZE, 59); Assert.assertEquals(metric.MIN_INSERT_SIZE, 29); Assert.assertEquals(metric.MAX_INSERT_SIZE, 89); Assert.assertEquals(metric.READ_PAIRS, 101); Assert.assertEquals(metric.WIDTH_OF_10_PERCENT, 1); Assert.assertEquals(metric.WIDTH_OF_20_PERCENT, 1 + insertBy*2); Assert.assertEquals(metric.WIDTH_OF_30_PERCENT, 1 + insertBy*4); Assert.assertEquals(metric.WIDTH_OF_40_PERCENT, 1 + insertBy*6); Assert.assertEquals(metric.WIDTH_OF_50_PERCENT, 1 + insertBy*8); Assert.assertEquals(metric.WIDTH_OF_60_PERCENT, 1 + insertBy*10); Assert.assertEquals(metric.WIDTH_OF_70_PERCENT, 1 + insertBy*12); Assert.assertEquals(metric.WIDTH_OF_80_PERCENT, 1 + insertBy*14); Assert.assertEquals(metric.WIDTH_OF_90_PERCENT, 1 + insertBy*16); Assert.assertEquals(metric.WIDTH_OF_95_PERCENT, 1 + insertBy*18); Assert.assertEquals(metric.WIDTH_OF_99_PERCENT, 1 + insertBy*20); }
Example 19
Source File: MergeBamAlignmentTest.java From picard with MIT License | 4 votes |
/** * @return a 2-element array in which the first element is the unmapped SAM, and the second the mapped SAM. */ private File[] createSamFilesToBeMerged(final MultipleAlignmentSpec[] specs) { try { final File unmappedSam = File.createTempFile("unmapped.", ".sam"); unmappedSam.deleteOnExit(); final SAMFileWriterFactory factory = new SAMFileWriterFactory(); final SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.queryname); final SAMRecord unmappedRecord = new SAMRecord(header); unmappedRecord.setReadName("theRead"); unmappedRecord.setReadString("ACGTACGTACGTACGT"); unmappedRecord.setBaseQualityString("5555555555555555"); unmappedRecord.setReadUnmappedFlag(true); final SAMFileWriter unmappedWriter = factory.makeSAMWriter(header, false, unmappedSam); unmappedWriter.addAlignment(unmappedRecord); unmappedWriter.close(); final File alignedSam = File.createTempFile("aligned.", ".sam"); alignedSam.deleteOnExit(); final String sequence = "chr1"; // Populate the header with SAMSequenceRecords header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2.toPath())); final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); for (final MultipleAlignmentSpec spec : specs) { final SAMRecord alignedRecord = new SAMRecord(header); alignedRecord.setReadName(unmappedRecord.getReadName()); alignedRecord.setReadBases(unmappedRecord.getReadBases()); alignedRecord.setBaseQualities(unmappedRecord.getBaseQualities()); alignedRecord.setReferenceName(sequence); alignedRecord.setAlignmentStart(1); alignedRecord.setReadNegativeStrandFlag(spec.reverseStrand); alignedRecord.setCigarString(spec.cigar); alignedRecord.setMappingQuality(spec.mapQ); if (spec.oneOfTheBest) { alignedRecord.setAttribute(ONE_OF_THE_BEST_TAG, 1); } alignedWriter.addAlignment(alignedRecord); } alignedWriter.close(); return new File[]{unmappedSam, alignedSam}; } catch (IOException e) { throw new PicardException(e.getMessage(), e); } }
Example 20
Source File: SortedSAMWriter.java From abra2 with MIT License | 3 votes |
public static void main(String[] args) throws IOException { // Logger.LEVEL = Logger.Level.TRACE; String ref = "/home/lmose/dev/reference/hg38b/hg38.fa"; CompareToReference2 c2r = new CompareToReference2(); c2r.init(ref); ChromosomeChunker cc = new ChromosomeChunker(c2r); cc.init(); // SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue3/0.5.bam"); // SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue4/1.6.bam"); SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/mate_fix/0.87.bam"); SAMFileHeader header = reader.getFileHeader(); header.setSortOrder(SortOrder.coordinate); int maxRecordsInRam = 1000000; SAMRecord[] readsByNameArray = new SAMRecord[maxRecordsInRam]; SAMRecord[] readsByCoordArray = new SAMRecord[maxRecordsInRam]; SortedSAMWriter writer = new SortedSAMWriter(new String[] { "/home/lmose/dev/abra2_dev/mate_fix" }, "/home/lmose/dev/abra2_dev/mate_fix", new SAMFileHeader[] { reader.getFileHeader() }, true, cc, 1,true,1000,false, false, false, maxRecordsInRam); SAMFileWriterFactory writerFactory = new SAMFileWriterFactory(); SAMFileWriter out = writerFactory.makeBAMWriter(reader.getFileHeader(), true, new File("/home/lmose/dev/abra2_dev/mate_fix/test.bam"),1); long start = System.currentTimeMillis(); writer.processChromosome(out, 0, "chr12", readsByNameArray, readsByCoordArray); out.close(); long stop = System.currentTimeMillis(); System.out.println("Elapsed msecs: " + (stop-start)); }