htsjdk.samtools.seekablestream.SeekableStream Java Examples
The following examples show how to use
htsjdk.samtools.seekablestream.SeekableStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndexAggregate.java From cramtools with Apache License 2.0 | 6 votes |
public static IndexAggregate forDataFile(SeekableStream stream, SAMSequenceDictionary dictionary) throws IOException { String path = stream.getSource(); File indexFile = findIndexFileFor(path); if (indexFile == null) throw new FileNotFoundException("No index found for file: " + path); log.info("Using index file: " + indexFile.getAbsolutePath()); IndexAggregate a = new IndexAggregate(); if (indexFile.getName().matches("(?i).*\\.bai")) { a.bai = new CachingBAMFileIndex(indexFile, dictionary); return a; } if (indexFile.getName().matches("(?i).*\\.crai")) { a.crai = CramIndex.readIndex(new GZIPInputStream(new FileInputStream(indexFile))); return a; } throw new FileNotFoundException("No index found for file: " + path); }
Example #2
Source File: BAMRecordReader.java From Hadoop-BAM with MIT License | 6 votes |
private SamReader createSamReader(SeekableStream in, SeekableStream inIndex, ValidationStringency stringency, boolean useIntelInflater) { SamReaderFactory readerFactory = SamReaderFactory.makeDefault() .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true) .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false) .setUseAsyncIo(false); if (stringency != null) { readerFactory.validationStringency(stringency); } SamInputResource resource = SamInputResource.of(in); if (inIndex != null) { resource.index(inIndex); } if (useIntelInflater) { readerFactory.inflaterFactory(IntelGKLAccessor.newInflatorFactor()); } return readerFactory.open(resource); }
Example #3
Source File: CRAMInputFormat.java From Hadoop-BAM with MIT License | 5 votes |
private static List<Long> getContainerOffsets(Configuration conf, Path cramFile) throws IOException { SeekableStream seekableStream = WrapSeekable.openPath(conf, cramFile); CramContainerIterator cci = new CramContainerIterator(seekableStream); List<Long> containerOffsets = new ArrayList<Long>(); containerOffsets.add(seekableStream.position()); while (cci.hasNext()) { cci.next(); containerOffsets.add(seekableStream.position()); } containerOffsets.add(seekableStream.length()); return containerOffsets; }
Example #4
Source File: Utils.java From cramtools with Apache License 2.0 | 5 votes |
private static boolean checkEOF(htsjdk.samtools.cram.common.Version version, SeekableStream seekableStream) throws IOException { if (version.compatibleWith(CramVersions.CRAM_v3)) return streamEndsWith(seekableStream, CramIO.ZERO_F_EOF_MARKER); if (version.compatibleWith(CramVersions.CRAM_v2_1)) return streamEndsWith(seekableStream, CramIO.ZERO_B_EOF_MARKER); return false; }
Example #5
Source File: Utils.java From cramtools with Apache License 2.0 | 5 votes |
private static boolean streamEndsWith(SeekableStream seekableStream, byte[] marker) throws IOException { byte[] tail = new byte[marker.length]; seekableStream.seek(seekableStream.length() - marker.length); InputStreamUtils.readFully(seekableStream, tail, 0, tail.length); if (Arrays.equals(tail, marker)) return true; tail[8] = (byte) (tail[8] | 240); return Arrays.equals(tail, marker); }
Example #6
Source File: IndexAggregate.java From cramtools with Apache License 2.0 | 5 votes |
private static long seek(BAMIndex index, int seqId, int start, int end, SeekableStream cramStream) throws IOException { BAMFileSpan span = index.getSpanOverlapping(seqId, start, end); if (span == null) return -1; long[] coords = span.toCoordinateArray(); if (coords.length == 0) return -1; long[] offsets = new long[coords.length / 2]; for (int i = 0; i < offsets.length; i++) { offsets[i] = coords[i * 2] >> 16; } Arrays.sort(offsets); // peek into container in offset ascending order and choose the first // that intersects the query: for (int i = 0; i < offsets.length; i++) { log.debug("Peeking at offset: " + offsets[i]); IndexAggregate.ContainerBoundary b = peek(cramStream, offsets[i]); if (b == null) continue; boolean intersects = intersects(start, end, b); // System.out.printf("%b, %d, %d, %d, %d\n", intersects, start, end, // b.start, b.start + b.span); if (intersects(start, end, b)) { long offset = offsets[i]; log.debug("Found query at offset: " + offset); cramStream.seek(offset); return offset; } } return -1; }
Example #7
Source File: IndexAggregate.java From cramtools with Apache License 2.0 | 5 votes |
private static long seek(List<CramIndex.Entry> index, int seqId, int start, int end, SeekableStream cramStream) throws IOException { List<Entry> found = CramIndex.find(index, seqId, start, end - start + 1); if (found == null || found.size() == 0) return -1; cramStream.seek(found.get(0).containerStartOffset); log.debug("Found query at offset: " + found.get(0).containerStartOffset); return found.get(0).containerStartOffset; }
Example #8
Source File: VariantsSparkSinkUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private VCFHeader getHeader(String vcf) throws IOException { final java.nio.file.Path vcfPath = IOUtils.getPath(vcf); try (SeekableStream stream = new SeekablePathStream(vcfPath)) { return VCFHeaderReader.readHeaderFrom(stream); } catch (IOException e) { throw new UserException("Failed to read VCF header from " + vcf + "\n Caused by:" + e.getMessage(), e); } }
Example #9
Source File: CreateHadoopBamSplittingIndex.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static void createOnlySplittingIndex(final File inputBam, final File index, final long granularity) { assertIsBam(inputBam); try(SeekableStream in = new SeekableFileStream(inputBam); BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(index))) { BAMSBIIndexer.createIndex(in, out, granularity); } catch (final IOException e) { throw new UserException("Couldn't create splitting index", e); } }
Example #10
Source File: SeekingBAMFileReader.java From dataflow-java with Apache License 2.0 | 5 votes |
public SeekingBAMFileReader(final SamInputResource resource, final boolean eagerDecode, final ValidationStringency validationStringency, final SAMRecordFactory factory, long offset) throws IOException { super(resource.data().asUnbufferedSeekableStream(), (SeekableStream)null, eagerDecode, validationStringency, factory); this.offset = offset; this.stream = resource.data().asUnbufferedSeekableStream(); }
Example #11
Source File: BAMIO.java From dataflow-java with Apache License 2.0 | 5 votes |
private static SamInputResource openBAMFile(Storage.Objects storageClient, String gcsStoragePath, SeekableStream index) throws IOException { SeekableGCSStream s = new SeekableGCSStream(storageClient, gcsStoragePath); SamInputResource samInputResource = SamInputResource.of(s); if (index != null) { samInputResource.index(index); } LOG.info("getReadsFromBAMFile - got input resources"); return samInputResource; }
Example #12
Source File: BAMIO.java From dataflow-java with Apache License 2.0 | 5 votes |
private static SeekableStream openIndexForPath(Storage.Objects storageClient,String gcsStoragePath) { final String indexPath = gcsStoragePath + ".bai"; try { return new SeekableGCSStream(storageClient, indexPath); } catch (IOException ex) { LOG.info("No index for " + indexPath); // Ignore if there is no bai file } return null; }
Example #13
Source File: TestCRAMOutputFormat.java From Hadoop-BAM with MIT License | 5 votes |
private int getCRAMRecordCount( final File containerStreamFile, final SAMFileHeader header, final ReferenceSource refSource) throws IOException { // assemble a proper CRAM file from the container stream shard(s) in // order to verify the contents final ByteArrayInputStream mergedStream = mergeCRAMContainerStream ( containerStreamFile, header, refSource ); // now we can verify that we can read everything back in final CRAMFileReader resultCRAMReader = new CRAMFileReader( mergedStream, (SeekableStream) null, refSource, ValidationStringency.DEFAULT_STRINGENCY); final Iterator<SAMRecord> it = resultCRAMReader.getIterator(); int actualCount = 0; while (it.hasNext()) { it.next(); actualCount++; } return actualCount; }
Example #14
Source File: TestBAMSplitGuesser.java From Hadoop-BAM with MIT License | 5 votes |
@Test public void test() throws Exception { Configuration conf = new Configuration(); String bam = getClass().getClassLoader().getResource("test.bam").getFile(); SeekableStream ss = WrapSeekable.openPath(conf, new Path(bam)); BAMSplitGuesser bamSplitGuesser = new BAMSplitGuesser(ss, conf); long startGuess = bamSplitGuesser.guessNextBAMRecordStart(0, 3 * 0xffff + 0xfffe); assertEquals(SAMUtils.findVirtualOffsetOfFirstRecordInBam(new File(bam)), startGuess); }
Example #15
Source File: BAMSplitGuesser.java From Hadoop-BAM with MIT License | 5 votes |
public BAMSplitGuesser( SeekableStream ss, InputStream headerStream, Configuration conf) throws IOException { inFile = ss; header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf); referenceSequenceCount = header.getSequenceDictionary().size(); bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory()); }
Example #16
Source File: BAMSplitGuesser.java From Hadoop-BAM with MIT License | 5 votes |
/** The stream must point to a valid BAM file, because the header is read * from it. */ public BAMSplitGuesser( SeekableStream ss, Configuration conf) throws IOException { this(ss, ss, conf); // Secondary check that the header points to a BAM file: Picard can get // things wrong due to its autodetection. ss.seek(0); if (ss.read(buf.array(), 0, 4) != 4 || buf.getInt(0) != BGZF_MAGIC) throw new SAMFormatException("Does not seem like a BAM file"); }
Example #17
Source File: BamSlicerApplication.java From hmftools with GNU General Public License v3.0 | 5 votes |
private static void sliceFromURLs(@NotNull URL indexUrl, @NotNull URL bamUrl, @NotNull CommandLine cmd) throws IOException { File indexFile = downloadIndex(indexUrl); indexFile.deleteOnExit(); SamReader reader = createFromCommandLine(cmd).open(SamInputResource.of(bamUrl).index(indexFile)); BAMIndex bamIndex; if (indexFile.getPath().contains(".crai")) { SeekableStream craiIndex = CRAIIndex.openCraiFileAsBaiStream(indexFile, reader.getFileHeader().getSequenceDictionary()); bamIndex = new DiskBasedBAMFileIndex(craiIndex, reader.getFileHeader().getSequenceDictionary()); } else { bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false); } Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd); Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd); List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk); SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks); SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true) .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT))); queryIntervalsAndSpan.ifPresent(pair -> { LOGGER.info("Slicing bam on bed regions..."); CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray()); writeToSlice(writer, bedIterator); LOGGER.info("Done writing bed slices."); }); unmappedChunk.ifPresent(chunk -> { LOGGER.info("Slicing unmapped reads..."); CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped(); writeToSlice(writer, unmappedIterator); LOGGER.info("Done writing unmapped reads."); }); reader.close(); writer.close(); cachingReader.close(); }
Example #18
Source File: TestVCFHeaderReader.java From Hadoop-BAM with MIT License | 4 votes |
static SeekableStream seekableStream(final String resource) throws IOException { return new ByteArraySeekableStream(Resources.toByteArray(ClassLoader.getSystemClassLoader().getResource(resource))); }
Example #19
Source File: LinearBAMIndex.java From Hadoop-BAM with MIT License | 4 votes |
public LinearBAMIndex(SeekableStream stream, SAMSequenceDictionary dict) { super(stream, dict); }
Example #20
Source File: BAMFileIndexImpl.java From dataflow-java with Apache License 2.0 | 4 votes |
public BAMFileIndexImpl(SeekableStream stream, SAMSequenceDictionary dict) { super(stream, dict); mBamDictionary = dict; }
Example #21
Source File: BAMRecordReader.java From Hadoop-BAM with MIT License | 4 votes |
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException { // This method should only be called once (see Hadoop API). However, // there seems to be disagreement between implementations that call // initialize() and Hadoop-BAM's own code that relies on // {@link BAMInputFormat} to call initialize() when the reader is // created. Therefore we add this check for the time being. if(isInitialized) close(); isInitialized = true; reachedEnd = false; final Configuration conf = ctx.getConfiguration(); final FileVirtualSplit split = (FileVirtualSplit)spl; final Path file = split.getPath(); final FileSystem fs = file.getFileSystem(conf); ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf); boolean useIntelInflater = BAMInputFormat.useIntelInflater(conf); java.nio.file.Path index = SamFiles.findIndex(NIOFileUtil.asPath(fs.makeQualified(file).toUri())); Path fileIndex = index == null ? null : new Path(index.toUri()); SeekableStream indexStream = fileIndex == null ? null : WrapSeekable.openPath(fs, fileIndex); in = WrapSeekable.openPath(fs, file); SamReader samReader = createSamReader(in, indexStream, stringency, useIntelInflater); final SAMFileHeader header = samReader.getFileHeader(); long virtualStart = split.getStartVirtualOffset(); fileStart = virtualStart >>> 16; virtualEnd = split.getEndVirtualOffset(); SamReader.PrimitiveSamReader primitiveSamReader = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader(); bamFileReader = (BAMFileReader) primitiveSamReader; if (logger.isDebugEnabled()) { final long recordStart = virtualStart & 0xffff; logger.debug("Initialized BAMRecordReader; byte offset: {}, record offset: {}", fileStart, recordStart); } if (conf.getBoolean("hadoopbam.bam.keep-paired-reads-together", false)) { throw new IllegalArgumentException("Property hadoopbam.bam.keep-paired-reads-together is no longer honored."); } boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf); if (boundedTraversal && split.getIntervalFilePointers() != null) { // return reads for intervals List<Interval> intervals = BAMInputFormat.getIntervals(conf); QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary()); iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers()); } else if (boundedTraversal && split.getIntervalFilePointers() == null) { // return unmapped reads iterator = bamFileReader.queryUnmapped(); } else { // return everything BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(virtualStart, virtualEnd)); iterator = bamFileReader.getIterator(splitSpan); } }
Example #22
Source File: BCFSplitGuesser.java From Hadoop-BAM with MIT License | 4 votes |
private void cinSeek(long virt) throws IOException { if (bgzf) ((BlockCompressedInputStream)cin).seek(virt); else ((SeekableStream)cin).seek(virt); }
Example #23
Source File: BCFSplitGuesser.java From Hadoop-BAM with MIT License | 4 votes |
/** The stream must point to a valid BCF file, because the header is read * from it. */ public BCFSplitGuesser(SeekableStream ss) throws IOException { this(ss, ss); }
Example #24
Source File: IndexAggregate.java From cramtools with Apache License 2.0 | 4 votes |
public static IndexAggregate fromBaiFile(SeekableStream baiStream, SAMSequenceDictionary dictionary) throws IOException { IndexAggregate a = new IndexAggregate(); a.bai = new CachingBAMFileIndex(baiStream, dictionary); return a; }
Example #25
Source File: BAMInputFormat.java From Hadoop-BAM with MIT License | 4 votes |
private int addProbabilisticSplits( List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg) throws IOException { final Path path = ((FileSplit)splits.get(i)).getPath(); try (final SeekableStream sin = WrapSeekable.openPath(path.getFileSystem(cfg), path)) { final BAMSplitGuesser guesser = new BAMSplitGuesser(sin, cfg); FileVirtualSplit previousSplit = null; for (; i < splits.size(); ++i) { FileSplit fspl = (FileSplit)splits.get(i); if (!fspl.getPath().equals(path)) break; long beg = fspl.getStart(); long end = beg + fspl.getLength(); long alignedBeg = guesser.guessNextBAMRecordStart(beg, end); // As the guesser goes to the next BGZF block before looking for BAM // records, the ending BGZF blocks have to always be traversed fully. // Hence force the length to be 0xffff, the maximum possible. long alignedEnd = end << 16 | 0xffff; if (alignedBeg == end) { // No records detected in this split: merge it to the previous one. // This could legitimately happen e.g. if we have a split that is // so small that it only contains the middle part of a BGZF block. // // Of course, if it's the first split, then this is simply not a // valid BAM file. // // FIXME: In theory, any number of splits could only contain parts // of the BAM header before we start to see splits that contain BAM // records. For now, we require that the split size is at least as // big as the header and don't handle that case. if (previousSplit == null) throw new IOException("'" + path + "': "+ "no reads in first split: bad BAM file or tiny split size?"); previousSplit.setEndVirtualOffset(alignedEnd); } else { previousSplit = new FileVirtualSplit( path, alignedBeg, alignedEnd, fspl.getLocations()); if (logger.isDebugEnabled()) { final long byteOffset = alignedBeg >>> 16; final long recordOffset = alignedBeg & 0xffff; logger.debug( "Split {}: byte offset: {} record offset: {}, virtual offset: {}", i, byteOffset, recordOffset, alignedBeg); } newSplits.add(previousSplit); } } } return i; }
Example #26
Source File: KeyIgnoringVCFOutputFormat.java From Hadoop-BAM with MIT License | 4 votes |
public void readHeaderFrom(SeekableStream in) throws IOException { this.header = VCFHeaderReader.readHeaderFrom(in); }
Example #27
Source File: ReferenceSequenceFromSeekable.java From cramtools with Apache License 2.0 | 4 votes |
private ReferenceSequenceFromSeekable(SeekableStream s, Map<String, FastaSequenceIndexEntry> index) { this.s = s; this.index = index; }
Example #28
Source File: KeyIgnoringVCFOutputFormat.java From Hadoop-BAM with MIT License | 4 votes |
public void readHeaderFrom(Path path, FileSystem fs) throws IOException { SeekableStream i = WrapSeekable.openPath(fs, path); readHeaderFrom(i); i.close(); }
Example #29
Source File: IndexAggregate.java From cramtools with Apache License 2.0 | 3 votes |
/** * Find and seek the data stream to the position of the alignment query. * * @param seqId * reference sequence id * @param start * alignment start, 1-based inclusive * @param end * alignment end, 1-based exclusive * @param cramStream * the data stream to seek in * @return the offset found or -1 if the query was not found * @throws IOException */ public long seek(int seqId, int start, int end, SeekableStream cramStream) throws IOException { if (crai != null) return seek(crai, seqId, start, end, cramStream); if (bai != null) return seek(bai, seqId, start, end, cramStream); return -1; }