Java Code Examples for org.apache.hadoop.io.compress.GzipCodec#createOutputStream()
The following examples show how to use
org.apache.hadoop.io.compress.GzipCodec#createOutputStream() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFastqInputFormat.java From Hadoop-BAM with MIT License | 6 votes |
@Test public void testGzCompressedInput() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) ); fastqOut.write(twoFastq); fastqOut.close(); // now try to read it split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString()); assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString()); retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString()); assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString()); }
Example 2
Source File: TestQseqInputFormat.java From Hadoop-BAM with MIT License | 6 votes |
@Test public void testGzCompressedInput() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) ); qseqOut.write(twoQseq); qseqOut.close(); // now try to read it split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null); QseqRecordReader reader = new QseqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString()); assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString()); retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString()); assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString()); }
Example 3
Source File: EmoFileSystem.java From emodb with Apache License 2.0 | 5 votes |
private EmoSplitInputStream(String table, String split) throws IOException { if (isEmptySplit(split)) { _rows = Iterators.emptyIterator(); } else { // Get the DataStore and begin streaming the split's rows. CloseableDataStore dataStore = HadoopDataStoreManager.getInstance().getDataStore(_uri, _apiKey, _metricRegistry); _closer.register(dataStore); _rows = DataStoreStreaming.getSplit(dataStore, table, split, false, ReadConsistency.STRONG).iterator(); } _buffer.clear(); _buffer.limit(0); GzipCodec gzipCodec = new GzipCodec(); gzipCodec.setConf(new Configuration()); // Set up the pipes PipedOutputStream pipeRawToGzip = new PipedOutputStream(); _gzipIn = new PipedInputStream(pipeRawToGzip, 10 * 1024 * 1024); _rawOut = gzipCodec.createOutputStream(pipeRawToGzip); _closer.register(_gzipIn); _closer.register(pipeRawToGzip); // Start the asynchronous buffering thread _bufferThread = new Thread(new Runnable() { @Override public void run() { streamAndCompressInput(); } }); _bufferThread.start(); }
Example 4
Source File: TestFastqInputFormat.java From Hadoop-BAM with MIT License | 5 votes |
@Test(expected=RuntimeException.class) public void testCompressedSplit() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) ); fastqOut.write(twoFastq); fastqOut.close(); // now try to read it starting from the middle split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); }
Example 5
Source File: TestQseqInputFormat.java From Hadoop-BAM with MIT License | 5 votes |
@Test(expected=RuntimeException.class) public void testCompressedSplit() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) ); qseqOut.write(twoQseq); qseqOut.close(); // now try to read it starting from the middle split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null); QseqRecordReader reader = new QseqRecordReader(conf, split); }
Example 6
Source File: TestAllLoader.java From spork with Apache License 2.0 | 5 votes |
@Override public void writeTestData(File file, int recordCounts, int columnCount, String colSeparator) throws IOException { // write random test data GzipCodec gzipCodec = new GzipCodec(); CompressionOutputStream out = gzipCodec .createOutputStream(new FileOutputStream(file)); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( out)); try { for (int r = 0; r < recordCounts; r++) { // foreach row write n columns for (int c = 0; c < columnCount; c++) { if (c != 0) { writer.append(colSeparator); } writer.append(String.valueOf(Math.random())); } writer.append("\n"); } } finally { writer.close(); out.close(); } }