Java Code Examples for htsjdk.samtools.SAMFileHeader#SortOrder
The following examples show how to use
htsjdk.samtools.SAMFileHeader#SortOrder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SortSamSparkIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(enabled = false, dataProvider="sortbams", groups="spark") public void testSortBAMsSharded( final String inputFileName, final String unused, final String referenceFileName, final String outputExtension, final SAMFileHeader.SortOrder sortOrder) { final File inputFile = getTestFile(inputFileName); final File actualOutputFile = createTempFile("sort_sam", outputExtension); File referenceFile = null == referenceFileName ? null : getTestFile(referenceFileName); ArgumentsBuilder args = new ArgumentsBuilder(); args.addInput(inputFile); args.addOutput(actualOutputFile); if (null != referenceFile) { args.addReference(referenceFile); } args.add(StandardArgumentDefinitions.SORT_ORDER_LONG_NAME, sortOrder.name()); args.add(GATKSparkTool.SHARDED_OUTPUT_LONG_NAME,true); args.add(GATKSparkTool.NUM_REDUCERS_LONG_NAME, "2"); this.runCommandLine(args); final ReadsSparkSource source = new ReadsSparkSource(SparkContextFactory.getTestSparkContext()); final JavaRDD<GATKRead> reads = source.getParallelReads(new GATKPath(actualOutputFile.getAbsolutePath()), referenceFile == null ? null : new GATKPath(referenceFile.getAbsolutePath())); final SAMFileHeader header = source.getHeader(new GATKPath(actualOutputFile.getAbsolutePath()), referenceFileName == null ? null : new GATKPath(referenceFile.getAbsolutePath())); final List<SAMRecord> reloadedReads = reads.collect().stream().map(read -> read.convertToSAMRecord(header)).collect(Collectors.toList()); BaseTest.assertSorted(reloadedReads.iterator(), sortOrder.getComparatorInstance(), reloadedReads.stream().map(SAMRecord::getSAMString).collect(Collectors.joining("\n"))); }
Example 2
Source File: RevertSam.java From picard with MIT License | 5 votes |
private SAMFileHeader createOutHeader( final SAMFileHeader inHeader, final SAMFileHeader.SortOrder sortOrder, final boolean removeAlignmentInformation) { final SAMFileHeader outHeader = new SAMFileHeader(); outHeader.setSortOrder(sortOrder); if (!removeAlignmentInformation) { outHeader.setSequenceDictionary(inHeader.getSequenceDictionary()); outHeader.setProgramRecords(inHeader.getProgramRecords()); } return outHeader; }
Example 3
Source File: SortSamSparkIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(expectedExceptions = CommandLineException.BadArgumentValue.class, dataProvider = "getInvalidSortOrders") public void testBadSortOrders(SAMFileHeader.SortOrder badOrder){ final File unsortedBam = new File(getTestDataDir(), QUERY_NAME_BAM); ArgumentsBuilder args = new ArgumentsBuilder(); args.addInput(unsortedBam); args.addOutput(createTempFile("sort_bam_spark", BAM)); args.add(StandardArgumentDefinitions.SORT_ORDER_LONG_NAME, badOrder.toString()); this.runCommandLine(args); }
Example 4
Source File: SortSamSparkIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider="sortbams", groups="spark") public void testSortBAMs( final String inputFileName, final String expectedOutputFileName, final String referenceFileName, final String outputExtension, final SAMFileHeader.SortOrder sortOrder) throws Exception { final File inputFile = getTestFile(inputFileName); final File expectedOutputFile = getTestFile(expectedOutputFileName); final File actualOutputFile = createTempFile("sort_sam", outputExtension); File referenceFile = null == referenceFileName ? null : getTestFile(referenceFileName); final SamReaderFactory factory = SamReaderFactory.makeDefault(); ArgumentsBuilder args = new ArgumentsBuilder(); args.addInput(inputFile); args.addOutput(actualOutputFile); if (null != referenceFile) { args.addReference(referenceFile); factory.referenceSequence(referenceFile); } args.add(StandardArgumentDefinitions.SORT_ORDER_LONG_NAME, sortOrder.name()); this.runCommandLine(args); //test files are exactly equal SamAssertionUtils.assertSamsEqual(actualOutputFile, expectedOutputFile, ValidationStringency.DEFAULT_STRINGENCY, referenceFile); //test sorting matches htsjdk try(ReadsDataSource in = new ReadsPathDataSource(actualOutputFile.toPath(), factory )) { BaseTest.assertSorted(Utils.stream(in).map(read -> read.convertToSAMRecord(in.getHeader())).iterator(), sortOrder.getComparatorInstance()); } }
Example 5
Source File: GATKSparkTool.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
static SAMFileHeader.SortOrder identifySortOrder(final Collection<SAMFileHeader> headers){ if (headers.size() > 1){ return SAMFileHeader.SortOrder.unsorted; } else { return headers.iterator().next().getSortOrder(); } }
Example 6
Source File: ReadsPathDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@VisibleForTesting static SAMFileHeader.SortOrder identifySortOrder( final List<SAMFileHeader> headers ){ final Set<SAMFileHeader.SortOrder> sortOrders = headers.stream().map(SAMFileHeader::getSortOrder).collect(Collectors.toSet()); final SAMFileHeader.SortOrder order; if (sortOrders.size() == 1) { order = sortOrders.iterator().next(); } else { order = SAMFileHeader.SortOrder.unsorted; logger.warn("Inputs have different sort orders. Assuming {} sorted reads for all of them.", order); } return order; }
Example 7
Source File: AsIsMarkDuplicatesTest.java From picard with MIT License | 4 votes |
AbstractMarkDuplicatesCommandLineProgramTester getTester(SAMFileHeader.SortOrder so) { return new BySumOfBaseQAndInOriginalOrderMDTester(); }
Example 8
Source File: SortSamSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
public SAMFileHeader.SortOrder getSamOrder() { return order; }
Example 9
Source File: SortSamSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
SparkSortOrder(SAMFileHeader.SortOrder order) { this.order = order; }
Example 10
Source File: SVUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test(groups = "sv", dataProvider = "forGetSamRecordComparator") void testGetSamRecordComparator(final SAMFileHeader.SortOrder sortOrder, final Class<SAMRecordComparator> expectedClass) { Assert.assertEquals(SVUtils.getSamRecordComparator(sortOrder).getClass(), expectedClass); }
Example 11
Source File: BoundSAMRecordIterator.java From cramtools with Apache License 2.0 | 4 votes |
@Override public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return delegate.assertSorted(sortOrder); }
Example 12
Source File: SamClosedFileReader.java From rtg-tools with BSD 2-Clause "Simplified" License | 4 votes |
@Override public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { return this; }
Example 13
Source File: CollectDuplicateMetricsTester.java From picard with MIT License | 4 votes |
public CollectDuplicateMetricsTester( final SAMFileHeader.SortOrder sortOrder) { super(DuplicateScoringStrategy.ScoringStrategy.SUM_OF_BASE_QUALITIES, sortOrder, false); }
Example 14
Source File: Merge.java From cramtools with Apache License 2.0 | 4 votes |
@Override public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) { // TODO Auto-generated method stub return null; }
Example 15
Source File: AbstractAlignmentMergerTest.java From picard with MIT License | 4 votes |
@Test public void testUnmapBacterialContamination() throws IOException { final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname); final SAMFileHeader header = builder.getHeader(); final SAMFileHeader.SortOrder sortOrder = header.getSortOrder(); final SAMFileHeader newHeader = SAMRecordSetBuilder.makeDefaultHeader(sortOrder, 100000,true); builder.setHeader(newHeader); final File reference = File.createTempFile("reference",".fasta"); reference.deleteOnExit(); builder.writeRandomReference(reference.toPath()); builder.addPair("overlappingpair", 0,500,500, false,false,"20S20M60S","20S20M60M",true,false,45); builder.addPair("overlappingpairFirstAligned", 0,500,500, false,true,"20S20M60S",null,true,false,45); builder.addPair("overlappingpairSecondAligned", 0,500,500, true,false,null,"20S20M60S",true,false,45); builder.addPair("overlappingpairFirstAlignedB", 0,500,500, false,true,"20S20M60S",null,false,true,45); builder.addPair("overlappingpairSecondAlignedB", 0,500,500, true,false,null,"20S20M60S",false,true,45); // builder.addFrag("frag",1,500,false,false,"20S20M60S",null, 45); // builder.addFrag("frag2",1,500,true,false,"20S20M60S",null, 45); // builder.addFrag("frag3",1,500,false,false,"20S20M60S",null, 45); // builder.addFrag("frag4",1,500,true,false,"20S20M60S",null, 45); final File file = newTempSamFile("aligned"); try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(builder.getHeader(), true, file, null)) { builder.getRecords().forEach(writer::addAlignment); } final RevertSam revertSam = new RevertSam(); revertSam.INPUT = file; final File fileUnaligned = newTempSamFile("unaligned"); revertSam.OUTPUT = fileUnaligned; revertSam.SANITIZE = false; revertSam.REMOVE_ALIGNMENT_INFORMATION=true; revertSam.REMOVE_DUPLICATE_INFORMATION=true; revertSam.SORT_ORDER = SAMFileHeader.SortOrder.queryname; Assert.assertEquals(revertSam.doWork(),0); MergeBamAlignment mergeBamAlignment = new MergeBamAlignment(); mergeBamAlignment.ALIGNED_BAM = Collections.singletonList(file); mergeBamAlignment.UNMAPPED_BAM = fileUnaligned; mergeBamAlignment.UNMAP_CONTAMINANT_READS = true; //yuck! final RequiredReferenceArgumentCollection requiredReferenceArgumentCollection = new RequiredReferenceArgumentCollection(); requiredReferenceArgumentCollection.REFERENCE_SEQUENCE = reference; mergeBamAlignment.referenceSequence = requiredReferenceArgumentCollection; final File fileMerged = newTempSamFile("merged"); mergeBamAlignment.OUTPUT = fileMerged; // merge file with itself. Assert.assertEquals(mergeBamAlignment.doWork(),0); // check that all reads have been unmapped due to bacterial contamination as needed. try (SamReader mergedReader = SamReaderFactory.makeDefault().open(fileMerged)) { for (SAMRecord mergedRecord : mergedReader) { Assert.assertTrue(mergedRecord.getReadUnmappedFlag(), mergedRecord.toString()); Assert.assertTrue(!mergedRecord.getReadPairedFlag() || mergedRecord.getMateUnmappedFlag(), mergedRecord.toString()); } } }
Example 16
Source File: SortSam.java From picard with MIT License | 4 votes |
public SAMFileHeader.SortOrder getSortOrder(){ return SAMFileHeader.SortOrder.valueOf(this.name()); }
Example 17
Source File: MergeSamFiles.java From picard with MIT License | 4 votes |
/** Combines multiple SAM/BAM files into one. */ @Override protected int doWork() { boolean matchedSortOrders = true; // read interval list if it is defined final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals() ); // map reader->iterator used if INTERVALS is defined final Map<SamReader, CloseableIterator<SAMRecord> > samReaderToIterator = new HashMap<>(INPUT.size()); final List<Path> inputPaths = IOUtil.getPaths(INPUT); // Open the files for reading and writing final List<SamReader> readers = new ArrayList<>(); final List<SAMFileHeader> headers = new ArrayList<>(); { SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory for (final Path inFile : inputPaths) { IOUtil.assertFileIsReadable(inFile); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile); if (INTERVALS != null) { if (!in.hasIndex()) { throw new PicardException("Merging with interval but BAM file is not indexed: " + inFile); } final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(in, intervalList, true); samReaderToIterator.put(in, samIterator); } readers.add(in); headers.add(in.getFileHeader()); // A slightly hackish attempt to keep memory consumption down when merging multiple files with // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, then // replace the duplicate copies with a single dictionary to reduce the memory footprint. if (dict == null) { dict = in.getFileHeader().getSequenceDictionary(); } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) { in.getFileHeader().setSequenceDictionary(dict); } matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER; } } // If all the input sort orders match the output sort order then just merge them and // write on the fly, otherwise setup to merge and sort before writing out the final file IOUtil.assertFileIsWritable(OUTPUT); final boolean presorted; final SAMFileHeader.SortOrder headerMergerSortOrder; final boolean mergingSamRecordIteratorAssumeSorted; if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null ) { log.info("Input files are in same order as output so sorting to temp directory is not needed."); headerMergerSortOrder = SORT_ORDER; mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED; presorted = true; } else { log.info("Sorting input files using temp directory " + TMP_DIR); headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted; mergingSamRecordIteratorAssumeSorted = false; presorted = false; } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES); final MergingSamRecordIterator iterator; // no interval defined, get an iterator for the whole bam if (intervalList == null) { iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted); } else { // show warning related to https://github.com/broadinstitute/picard/pull/314/files log.info("Warning: merged bams from different interval lists may contain the same read in both files"); iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true); } final SAMFileHeader header = headerMerger.getMergedHeader(); for (final String comment : COMMENT) { header.addComment(comment); } header.setSortOrder(SORT_ORDER); final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); if (USE_THREADING) { samFileWriterFactory.setUseAsyncIo(true); } final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT); // Lastly loop through and write out the records final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); out.addAlignment(record); progress.record(record); } log.info("Finished reading inputs."); for(final CloseableIterator<SAMRecord> iter : samReaderToIterator.values()) CloserUtil.close(iter); CloserUtil.close(readers); out.close(); return 0; }
Example 18
Source File: RevertSam.java From picard with MIT License | 4 votes |
static void validateSanitizeSortOrder(final boolean sanitize, final SAMFileHeader.SortOrder sortOrder, final List<String> errors) { if (sanitize && sortOrder != SAMFileHeader.SortOrder.queryname) { errors.add("SORT_ORDER must be queryname when sanitization is enabled with SANITIZE=true."); } }
Example 19
Source File: MarkIlluminaAdapters.java From picard with MIT License | 4 votes |
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(METRICS); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder(); SAMFileWriter out = null; if (OUTPUT != null) { IOUtil.assertFileIsWritable(OUTPUT); out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT); } final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count"); // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping final AdapterPair[] adapters; { final List<AdapterPair> tmp = new ArrayList<AdapterPair>(); tmp.addAll(ADAPTERS); if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) { tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER)); } adapters = tmp.toArray(new AdapterPair[tmp.size()]); } //////////////////////////////////////////////////////////////////////// // Main loop that consumes reads, clips them and writes them to the output //////////////////////////////////////////////////////////////////////// final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read"); final SAMRecordIterator iterator = in.iterator(); final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters). setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE). setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE). setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP). setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null; rec.setAttribute(ReservedTagConstants.XT, null); // Do the clipping one way for PE and another for SE reads if (rec.getReadPairedFlag()) { // Assert that the input file is in query name order only if we see some PE reads if (order != SAMFileHeader.SortOrder.queryname) { throw new PicardException("Input BAM file must be sorted by queryname"); } if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName()); rec2.setAttribute(ReservedTagConstants.XT, null); // Assert that we did in fact just get two mate pairs if (!rec.getReadName().equals(rec2.getReadName())) { throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " + rec.getReadName() + ", " + rec2.getReadName()); } // establish which of pair is first and which second final SAMRecord first, second; if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) { first = rec; second = rec2; } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) { first = rec2; second = rec; } else { throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName()); } adapterMarker.adapterTrimIlluminaPairedReads(first, second); } else { adapterMarker.adapterTrimIlluminaSingleRead(rec); } // Then output the records, update progress and metrics for (final SAMRecord r : new SAMRecord[]{rec, rec2}) { if (r != null) { progress.record(r); if (out != null) out.addAlignment(r); final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT); if (clip != null) histo.increment(r.getReadLength() - clip + 1); } } } if (out != null) out.close(); // Lastly output the metrics to file final MetricsFile<?, Integer> metricsFile = getMetricsFile(); metricsFile.setHistogram(histo); metricsFile.write(METRICS); CloserUtil.close(in); return 0; }
Example 20
Source File: ExampleCollectSingleMetricsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 2 votes |
/** * Return the sort order required/expected by this collector. * @return SAMFileHeader.SortOrder for this collector */ @Override protected SAMFileHeader.SortOrder getExpectedSortOrder() { return exampleSingleCollector.getExpectedSortOrder(); }