Java Code Examples for htsjdk.samtools.SamReaderFactory#open()
The following examples show how to use
htsjdk.samtools.SamReaderFactory#open() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BamSlicerApplication.java From hmftools with GNU General Public License v3.0 | 6 votes |
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException { String inputPath = cmd.getOptionValue(INPUT); String vcfPath = cmd.getOptionValue(VCF); int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")); SamReaderFactory readerFactory = createFromCommandLine(cmd); SamReader reader = readerFactory.open(new File(inputPath)); QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity); CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals); SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true) .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT))); writeToSlice(writer, iterator); writer.close(); reader.close(); }
Example 2
Source File: CompareSAMs.java From picard with MIT License | 6 votes |
/** * Do the work after command line has been parsed. RuntimeException may be * thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE); try (final SamReader samReader1 = samReaderFactory.open(SAM_FILES.get(0)); final SamReader samReader2 = samReaderFactory.open(SAM_FILES.get(1))) { final SamComparison comparison = new SamComparison(samReader1, samReader2, SAM_FILES.get(0).getAbsolutePath(), SAM_FILES.get(1).getAbsolutePath(), samComparisonArgumentCollection); if (OUTPUT != null) { comparison.writeReport(OUTPUT, getDefaultHeaders()); } if (comparison.areEqual()) { System.out.println("SAM files match."); } else { System.out.println("SAM files differ."); } return comparison.areEqual() ? 0 : 1; } catch (IOException e) { throw new RuntimeIOException("Error opening file", e); } }
Example 3
Source File: SamFormatConverterTest.java From picard with MIT License | 6 votes |
private void convertFile(final File inputFile, final File fileToCompare, final String extension) throws IOException { final List<File> samFiles = new ArrayList<>(); final ValidateSamFile validateSamFile = new ValidateSamFile(); final File output = File.createTempFile("SamFormatConverterTest." + inputFile.getName(), extension); output.deleteOnExit(); SamFormatConverter.convert(inputFile, output, ESSENTIALLY_EMPTY_REFERENCE_TO_USE_WITH_UNMAPPED_CRAM, Defaults.CREATE_INDEX); validateSamFile.INPUT = output; assertEquals(validateSamFile.doWork(), 0); final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(ESSENTIALLY_EMPTY_REFERENCE_TO_USE_WITH_UNMAPPED_CRAM); try (final SamReader samReader1 = samReaderFactory.open(output); final SamReader samReader2 = samReaderFactory.open(fileToCompare)) { final SamComparison samComparison = new SamComparison(samReader1, samReader2); Assert.assertTrue(samComparison.areEqual()); } }
Example 4
Source File: BAMRecordReader.java From Hadoop-BAM with MIT License | 6 votes |
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex, ValidationStringency stringency, boolean useIntelInflater) { SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true) .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } SamInputResource resource = SamInputResource.of(in); if (inIndex != null) { resource.index(inIndex); } if (useIntelInflater) { readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor()); } return readerFactory.open(resource); }
Example 5
Source File: BAMIO.java From dataflow-java with Apache License 2.0 | 6 votes |
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException { SamReaderFactory samReaderFactory = SamReaderFactory .makeDefault() .validationStringency(stringency) .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES); if (includeFileSource) { samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS); } if (offset == 0) { return samReaderFactory.open(resource); } LOG.info("Initializing seeking reader with the offset of " + offset); SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource, false, stringency, DefaultSAMRecordFactory.getInstance(), offset); final SeekingReaderAdapter reader = new SeekingReaderAdapter(primitiveReader, resource); samReaderFactory.reapplyOptions(reader); return reader; }
Example 6
Source File: UnmarkDuplicatesIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException { int duplicateCount = 0; final SamReaderFactory factory = SamReaderFactory.makeDefault(); if(referenceFile != null) { factory.referenceSequence(referenceFile); } try ( final SamReader reader = factory.open(bam) ) { for ( SAMRecord read : reader ) { if ( read.getDuplicateReadFlag() ) { ++duplicateCount; } } } return duplicateCount; }
Example 7
Source File: BamSlicerApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static SamReader createCachingReader(@NotNull File indexFile, @NotNull URL bamUrl, @NotNull CommandLine cmd, @NotNull List<Chunk> sliceChunks) throws IOException { OkHttpClient httpClient = SlicerHttpClient.create(Integer.parseInt(cmd.getOptionValue(MAX_CONCURRENT_REQUESTS, MAX_CONCURRENT_REQUESTS_DEFAULT))); int maxBufferSize = readMaxBufferSize(cmd); SamInputResource bamResource = SamInputResource.of(new CachingSeekableHTTPStream(httpClient, bamUrl, sliceChunks, maxBufferSize)).index(indexFile); SamReaderFactory readerFactory = createFromCommandLine(cmd); return readerFactory.open(bamResource); }
Example 8
Source File: CleanSam.java From picard with MIT License | 5 votes |
/** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. * * @return program exit status. */ @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE); if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) { factory.validationStringency(ValidationStringency.LENIENT); } final SamReader reader = factory.open(INPUT); final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); final CloseableIterator<SAMRecord> it = reader.iterator(); final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class)); // If the read (or its mate) maps off the end of the alignment, clip it while (it.hasNext()) { final SAMRecord rec = it.next(); // If the read (or its mate) maps off the end of the alignment, clip it AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec); // check the read's mapping quality if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) { rec.setMappingQuality(0); } writer.addAlignment(rec); progress.record(rec); } writer.close(); it.close(); CloserUtil.close(reader); return 0; }
Example 9
Source File: CleanSamTester.java From picard with MIT License | 5 votes |
protected void test() { try { final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); // Validate it has the expected cigar validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT); SamReader samReader = factory.open(getOutput()); final SAMRecordIterator iterator = samReader.iterator(); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); Assert.assertEquals(rec.getCigarString(), expectedCigar); if (SAMUtils.hasMateCigar(rec)) { Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar); } } CloserUtil.close(samReader); // Run validation on the output file samReader = factory.open(getOutput()); final boolean validated = validator.validateSamFileVerbose(samReader, null); CloserUtil.close(samReader); Assert.assertTrue(validated, "ValidateSamFile failed"); } finally { IOUtil.recursiveDelete(getOutputDir().toPath()); } }
Example 10
Source File: SAMRecordReader.java From Hadoop-BAM with MIT License | 5 votes |
private SamReader createSamReader(InputStream in, ValidationStringency stringency) { SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } return readerFactory.open(SamInputResource.of(in)); }
Example 11
Source File: AbstractPrintReadsIntegrationTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "readFilterTestData") public void testReadFilters( final String input, final String reference, final String extOut, final List<String> inputArgs, final int expectedCount) throws IOException { final File outFile = createTempFile("testReadFilter", extOut); final ArgumentsBuilder args = new ArgumentsBuilder(); args.addRaw("-I"); args.addRaw(new File(TEST_DATA_DIR, input).getAbsolutePath()); args.addRaw("-O"); args.addRaw(outFile.getAbsolutePath()); if ( reference != null ) { args.addRaw("-R"); args.addRaw(new File(TEST_DATA_DIR, reference).getAbsolutePath()); } for (final String filter : inputArgs) { args.addRaw(filter); } runCommandLine(args); SamReaderFactory factory = SamReaderFactory.makeDefault(); if (reference != null) { factory = factory.referenceSequence(new File(TEST_DATA_DIR, reference)); } int count = 0; try (final SamReader reader = factory.open(outFile)) { Iterator<SAMRecord> it = reader.iterator(); while (it.hasNext()) { SAMRecord rec = it.next(); count++; } } Assert.assertEquals(count, expectedCount); }
Example 12
Source File: RevertSam.java From picard with MIT License | 4 votes |
private Map<SAMReadGroupRecord, FastqQualityFormat> createReadGroupFormatMap( final SAMFileHeader inHeader, final File referenceSequence, final ValidationStringency validationStringency, final File input, final boolean restoreOriginalQualities) { final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<>(); final SamReaderFactory readerFactory = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).validationStringency(validationStringency); // Figure out the quality score encoding scheme for each read group. for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) { final SamRecordFilter filter = new SamRecordFilter() { public boolean filterOut(final SAMRecord rec) { return !rec.getReadGroup().getId().equals(rg.getId()); } public boolean filterOut(final SAMRecord first, final SAMRecord second) { throw new UnsupportedOperationException(); } }; try (final SamReader reader = readerFactory.open(input)) { final FilteringSamIterator filterIterator = new FilteringSamIterator(reader.iterator(), filter); if (filterIterator.hasNext()) { readGroupToFormat.put(rg, QualityEncodingDetector.detect( QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, filterIterator, restoreOriginalQualities)); } else { log.warn("Skipping read group " + rg.getReadGroupId() + " with no reads"); } } catch (IOException e) { throw new RuntimeIOException(e); } } for (final SAMReadGroupRecord r : readGroupToFormat.keySet()) { log.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r)); } if (readGroupToFormat.values().contains(FastqQualityFormat.Solexa)) { throw new PicardException("No quality score encoding conversion implemented for " + FastqQualityFormat.Solexa); } return readGroupToFormat; }
Example 13
Source File: MergeBamAlignmentTest.java From picard with MIT License | 4 votes |
/** * Minimal test of merging data from separate read 1 and read 2 alignments */ @Test(dataProvider="separateTrimmed") public void testMergingFromSeparatedReadTrimmedAlignments(final File unmapped, final List<File> r1Align, final List<File> r2Align, final int r1Trim, final int r2Trim, final String testName) throws Exception { final File output = File.createTempFile("mergeMultipleAlignmentsTest", ".sam"); output.deleteOnExit(); doMergeAlignment(unmapped, null, r1Align, r2Align, r1Trim, r2Trim, false, true, false, 1, "0", "1.0", "align!", "myAligner", true, fasta, output, SamPairUtil.PairOrientation.FR, null, null, null, null, null); SamReaderFactory factory = SamReaderFactory.makeDefault(); final SamReader result = factory.open(output); for (final SAMRecord sam : result) { // Get the alignment record final List<File> rFiles = sam.getFirstOfPairFlag() ? r1Align : r2Align; SAMRecord alignment = null; for (final File f : rFiles) { for (final SAMRecord tmp : factory.open(f)) { if (tmp.getReadName().equals(sam.getReadName())) { alignment = tmp; break; } } if (alignment != null) break; } final int trim = sam.getFirstOfPairFlag() ? r1Trim : r2Trim; final int notWrittenToFastq = sam.getReadLength() - (trim + alignment.getReadLength()); final int beginning = sam.getReadNegativeStrandFlag() ? notWrittenToFastq : trim; final int end = sam.getReadNegativeStrandFlag() ? trim : notWrittenToFastq; if (!sam.getReadUnmappedFlag()) { final CigarElement firstMergedCigarElement = sam.getCigar().getCigarElement(0); final CigarElement lastMergedCigarElement = sam.getCigar().getCigarElement(sam.getCigar().getCigarElements().size() - 1); final CigarElement firstAlignedCigarElement = alignment.getCigar().getCigarElement(0); final CigarElement lastAlignedCigarElement = alignment.getCigar().getCigarElement(alignment.getCigar().getCigarElements().size() - 1); if (beginning > 0) { Assert.assertEquals(firstMergedCigarElement.getOperator(), CigarOperator.S, "First element is not a soft clip"); Assert.assertEquals(firstMergedCigarElement.getLength(), beginning + ((firstAlignedCigarElement.getOperator() == CigarOperator.S) ? firstAlignedCigarElement.getLength() : 0)); } if (end > 0) { Assert.assertEquals(lastMergedCigarElement.getOperator(), CigarOperator.S, "Last element is not a soft clip"); Assert.assertEquals(lastMergedCigarElement.getLength(), end + ((lastAlignedCigarElement.getOperator() == CigarOperator.S) ? lastAlignedCigarElement.getLength() : 0)); } } } }
Example 14
Source File: BAMDiff.java From dataflow-java with Apache License 2.0 | 4 votes |
public void runDiff() throws Exception { SamReaderFactory readerFactory = SamReaderFactory .makeDefault() .validationStringency(ValidationStringency.SILENT) .enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES); LOG.info("Opening file 1 for diff: " + BAMFile1); SamReader reader1 = readerFactory.open(new File(BAMFile1)); LOG.info("Opening file 2 for diff: " + BAMFile2); SamReader reader2 = readerFactory.open(new File(BAMFile2)); try { Iterator<Contig> contigsToProcess = null; if (options.contigsToProcess != null && !options.contigsToProcess.isEmpty()) { Iterable<Contig> parsedContigs = Contig.parseContigsFromCommandLine(options.contigsToProcess); referencesToProcess = Sets.newHashSet(); for (Contig c : parsedContigs) { referencesToProcess.add(c.referenceName); } contigsToProcess = parsedContigs.iterator(); if (!contigsToProcess.hasNext()) { return; } } LOG.info("Comparing headers"); if (!compareHeaders(reader1.getFileHeader(), reader2.getFileHeader())) { error("Headers are not equal"); return; } LOG.info("Headers are equal"); do { SAMRecordIterator it1; SAMRecordIterator it2; if (contigsToProcess == null) { LOG.info("Checking all the reads"); it1 = reader1.iterator(); it2 = reader2.iterator(); } else { Contig contig = contigsToProcess.next(); LOG.info("Checking contig " + contig.toString()); processedContigs++; it1 = reader1.queryOverlapping(contig.referenceName, (int)contig.start, (int)contig.end); it2 = reader2.queryOverlapping(contig.referenceName, (int)contig.start, (int)contig.end); } if (!compareRecords(it1, it2)) { break; } it1.close(); it2.close(); } while (contigsToProcess != null && contigsToProcess.hasNext()); } catch (Exception ex) { throw ex; } finally { reader1.close(); reader2.close(); } LOG.info("Processed " + processedContigs + " contigs, " + processedLoci + " loci, " + processedReads + " reads."); }
Example 15
Source File: ReadsPathDataSource.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Initialize this data source with multiple SAM/BAM/CRAM files, explicit indices for those files, * and a custom SamReaderFactory. * * @param samPaths paths to SAM/BAM/CRAM files, not null * @param samIndices indices for all of the SAM/BAM/CRAM files, in the same order as samPaths. May be null, * in which case index paths are inferred automatically. * @param customSamReaderFactory SamReaderFactory to use, if null a default factory with no reference and validation * stringency SILENT is used. * @param cloudWrapper caching/prefetching wrapper for the data, if on Google Cloud. * @param cloudIndexWrapper caching/prefetching wrapper for the index, if on Google Cloud. */ public ReadsPathDataSource( final List<Path> samPaths, final List<Path> samIndices, SamReaderFactory customSamReaderFactory, Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper ) { Utils.nonNull(samPaths); Utils.nonEmpty(samPaths, "ReadsPathDataSource cannot be created from empty file list"); if ( samIndices != null && samPaths.size() != samIndices.size() ) { throw new UserException(String.format("Must have the same number of BAM/CRAM/SAM paths and indices. Saw %d BAM/CRAM/SAMs but %d indices", samPaths.size(), samIndices.size())); } readers = new LinkedHashMap<>(samPaths.size() * 2); backingPaths = new LinkedHashMap<>(samPaths.size() * 2); indicesAvailable = true; final SamReaderFactory samReaderFactory = customSamReaderFactory == null ? SamReaderFactory.makeDefault().validationStringency(ReadConstants.DEFAULT_READ_VALIDATION_STRINGENCY) : customSamReaderFactory; int samCount = 0; for ( final Path samPath : samPaths ) { // Ensure each file can be read try { IOUtil.assertFileIsReadable(samPath); } catch ( SAMException|IllegalArgumentException e ) { throw new UserException.CouldNotReadInputFile(samPath.toString(), e); } Function<SeekableByteChannel, SeekableByteChannel> wrapper = (BucketUtils.isEligibleForPrefetching(samPath) ? cloudWrapper : Function.identity()); // if samIndices==null then we'll guess the index name from the file name. // If the file's on the cloud, then the search will only consider locations that are also // in the cloud. Function<SeekableByteChannel, SeekableByteChannel> indexWrapper = ((samIndices != null && BucketUtils.isEligibleForPrefetching(samIndices.get(samCount)) || (samIndices == null && BucketUtils.isEligibleForPrefetching(samPath))) ? cloudIndexWrapper : Function.identity()); SamReader reader; if ( samIndices == null ) { reader = samReaderFactory.open(samPath, wrapper, indexWrapper); } else { final SamInputResource samResource = SamInputResource.of(samPath, wrapper); Path indexPath = samIndices.get(samCount); samResource.index(indexPath, indexWrapper); reader = samReaderFactory.open(samResource); } // Ensure that each file has an index if ( ! reader.hasIndex() ) { indicesAvailable = false; } readers.put(reader, null); backingPaths.put(reader, samPath); ++samCount; } // Prepare a header merger only if we have multiple readers headerMerger = samPaths.size() > 1 ? createHeaderMerger() : null; }
Example 16
Source File: Transcode.java From cramtools with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException { Params params = new Params(); JCommander jc = new JCommander(params); jc.parse(args); Log.setGlobalLogLevel(params.logLevel); if (args.length == 0 || params.help) { usage(jc); System.exit(1); } if (params.reference == null) { System.out.println("Reference file not found, will try downloading..."); } ReferenceSource referenceSource = null; if (params.reference != null) { System.setProperty("reference", params.reference.getAbsolutePath()); referenceSource = new ReferenceSource(params.reference); } else { String prop = System.getProperty("reference"); if (prop != null) { referenceSource = new ReferenceSource(new File(prop)); } } SamReaderFactory factory = SamReaderFactory.make().validationStringency(params.validationLevel); SamInputResource r; if ("file".equalsIgnoreCase(params.url.getProtocol())) r = SamInputResource.of(params.url.getPath()); else r = SamInputResource.of(params.url); SamReader reader = factory.open(r); SAMRecordIterator iterator = reader.iterator(); SAMFileWriterFactory writerFactory = new SAMFileWriterFactory(); SAMFileWriter writer = null; OutputStream os = new BufferedOutputStream(new FileOutputStream(params.outputFile)); switch (params.outputFormat) { case BAM: writer = writerFactory.makeBAMWriter(reader.getFileHeader(), reader.getFileHeader().getSortOrder() == SortOrder.coordinate, os); break; case CRAM: writer = writerFactory.makeCRAMWriter(reader.getFileHeader(), os, params.reference); break; default: System.out.println("Unknown output format: " + params.outputFormat); System.exit(1); } while (iterator.hasNext()) { writer.addAlignment(iterator.next()); } writer.close(); reader.close(); }