Java Code Examples for org.apache.hadoop.io.compress.GzipCodec#createOutputStream()

The following examples show how to use org.apache.hadoop.io.compress.GzipCodec#createOutputStream() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestFastqInputFormat.java From Hadoop-BAM with MIT License

6 votes

@Test
public void testGzCompressedInput() throws IOException
{
	// write gzip-compressed data
	GzipCodec codec = new GzipCodec();
	PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
	fastqOut.write(twoFastq);
	fastqOut.close();

	// now try to read it
	split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null);
	FastqRecordReader reader = new FastqRecordReader(conf, split);

	boolean retval = reader.next(key, fragment);
	assertTrue(retval);
	assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
	assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());

	retval = reader.next(key, fragment);
	assertTrue(retval);
	assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
	assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
}

Example 2

Source File: TestQseqInputFormat.java From Hadoop-BAM with MIT License

6 votes

@Test
public void testGzCompressedInput() throws IOException
{
	// write gzip-compressed data
	GzipCodec codec = new GzipCodec();
	PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
	qseqOut.write(twoQseq);
	qseqOut.close();

	// now try to read it
	split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
	QseqRecordReader reader = new QseqRecordReader(conf, split);

	boolean retval = reader.next(key, fragment);
	assertTrue(retval);
	assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
	assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());

	retval = reader.next(key, fragment);
	assertTrue(retval);
	assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
	assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
}

Example 3

Source File: EmoFileSystem.java From emodb with Apache License 2.0

5 votes

private EmoSplitInputStream(String table, String split)
        throws IOException {
    if (isEmptySplit(split)) {
        _rows = Iterators.emptyIterator();
    } else {
        // Get the DataStore and begin streaming the split's rows.
        CloseableDataStore dataStore = HadoopDataStoreManager.getInstance().getDataStore(_uri, _apiKey, _metricRegistry);
        _closer.register(dataStore);

        _rows = DataStoreStreaming.getSplit(dataStore, table, split, false, ReadConsistency.STRONG).iterator();
    }

    _buffer.clear();
    _buffer.limit(0);
    GzipCodec gzipCodec = new GzipCodec();
    gzipCodec.setConf(new Configuration());

    // Set up the pipes
    PipedOutputStream pipeRawToGzip = new PipedOutputStream();
    _gzipIn = new PipedInputStream(pipeRawToGzip, 10 * 1024 * 1024);
    _rawOut = gzipCodec.createOutputStream(pipeRawToGzip);
    _closer.register(_gzipIn);
    _closer.register(pipeRawToGzip);

    // Start the asynchronous buffering thread
    _bufferThread = new Thread(new Runnable() {
        @Override
        public void run() {
            streamAndCompressInput();
        }
    });
    _bufferThread.start();
}

Example 4

Source File: TestFastqInputFormat.java From Hadoop-BAM with MIT License

5 votes

@Test(expected=RuntimeException.class)
public void testCompressedSplit() throws IOException
{
	// write gzip-compressed data
	GzipCodec codec = new GzipCodec();
	PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
	fastqOut.write(twoFastq);
	fastqOut.close();

	// now try to read it starting from the middle
	split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
	FastqRecordReader reader = new FastqRecordReader(conf, split);
}

Example 5

Source File: TestQseqInputFormat.java From Hadoop-BAM with MIT License

5 votes

@Test(expected=RuntimeException.class)
public void testCompressedSplit() throws IOException
{
	// write gzip-compressed data
	GzipCodec codec = new GzipCodec();
	PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
	qseqOut.write(twoQseq);
	qseqOut.close();

	// now try to read it starting from the middle
	split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
	QseqRecordReader reader = new QseqRecordReader(conf, split);
}

Example 6

Source File: TestAllLoader.java From spork with Apache License 2.0

5 votes

@Override
public void writeTestData(File file, int recordCounts, int columnCount,
        String colSeparator) throws IOException {

    // write random test data
    GzipCodec gzipCodec = new GzipCodec();
    CompressionOutputStream out = gzipCodec
            .createOutputStream(new FileOutputStream(file));
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
            out));

    try {

        for (int r = 0; r < recordCounts; r++) {
            // foreach row write n columns

            for (int c = 0; c < columnCount; c++) {

                if (c != 0) {
                    writer.append(colSeparator);
                }

                writer.append(String.valueOf(Math.random()));

            }
            writer.append("\n");

        }

    } finally {
        writer.close();
        out.close();
    }

}