org.apache.hadoop.io.SequenceFile.Writer Java Examples
The following examples show how to use
org.apache.hadoop.io.SequenceFile.Writer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TarUnpackerSequenceFileWriter.java From localization_nifi with Apache License 2.0 | 7 votes |
@Override protected void processInputStream(final InputStream stream, final FlowFile tarArchivedFlowFile, final Writer writer) throws IOException { try (final TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(stream))) { TarArchiveEntry tarEntry; while ((tarEntry = tarIn.getNextTarEntry()) != null) { if (tarEntry.isDirectory()) { continue; } final String key = tarEntry.getName(); final long fileSize = tarEntry.getSize(); final InputStreamWritable inStreamWritable = new InputStreamWritable(tarIn, (int) fileSize); writer.append(new Text(key), inStreamWritable); logger.debug("Appending FlowFile {} to Sequence File", new Object[]{key}); } } }
Example #2
Source File: TestSequenceFileSerialization.java From hadoop with Apache License 2.0 | 6 votes |
public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/testseqser.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
Example #3
Source File: SequenceFileUtil.java From alchemy with Apache License 2.0 | 6 votes |
public static void writeSequenceFile(String path) throws Exception{ Writer.Option filePath = Writer.file(new Path(path)); Writer.Option keyClass = Writer.keyClass(IntWritable.class); Writer.Option valueClass = Writer.valueClass(Text.class); Writer.Option compression = Writer.compression(CompressionType.NONE); Writer writer = SequenceFile.createWriter(configuration, filePath, keyClass, valueClass, compression); IntWritable key = new IntWritable(); Text value = new Text(""); for(int i=0;i<100;i++){ key.set(i); value.set("value_"+i); writer.append(key, value); } writer.hflush(); writer.close(); }
Example #4
Source File: MergeSortRowIdMatcher.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private void writeRowIds(Writer writer, SegmentReader segmentReader) throws IOException { Terms terms = segmentReader.terms(BlurConstants.ROW_ID); if (terms == null) { return; } TermsEnum termsEnum = terms.iterator(null); BytesRef rowId; long s = System.nanoTime(); while ((rowId = termsEnum.next()) != null) { long n = System.nanoTime(); if (n + _10_SECONDS > s) { _progressable.progress(); s = System.nanoTime(); } writer.append(new Text(rowId.utf8ToString()), NullWritable.get()); } }
Example #5
Source File: SnapshotIndexDeletionPolicy.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private synchronized void storeGenerations() throws IOException { FileSystem fileSystem = _path.getFileSystem(_configuration); FileStatus[] listStatus = fileSystem.listStatus(_path); SortedSet<FileStatus> existing = new TreeSet<FileStatus>(Arrays.asList(listStatus)); long currentFile; if (!existing.isEmpty()) { FileStatus last = existing.last(); currentFile = Long.parseLong(last.getPath().getName()); } else { currentFile = 0; } Path path = new Path(_path, buffer(currentFile + 1)); LOG.info("Creating new snapshot file [{0}]", path); FSDataOutputStream outputStream = fileSystem.create(path, false); Writer writer = SequenceFile.createWriter(_configuration, outputStream, Text.class, LongWritable.class, CompressionType.NONE, null); for (Entry<String, Long> e : _namesToGenerations.entrySet()) { writer.append(new Text(e.getKey()), new LongWritable(e.getValue())); } writer.close(); outputStream.close(); cleanupOldFiles(fileSystem, existing); }
Example #6
Source File: TestSequenceFileSerialization.java From big-c with Apache License 2.0 | 6 votes |
public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/testseqser.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
Example #7
Source File: TestSequenceFileSerialization.java From RDFS with Apache License 2.0 | 6 votes |
public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/test.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
Example #8
Source File: CircusTrainCopyListing.java From circus-train with Apache License 2.0 | 6 votes |
@Override public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException { try (Writer writer = newWriter(pathToListFile)) { Path sourceRootPath = getRootPath(getConf()); for (Path sourcePath : options.getSourcePaths()) { FileSystem fileSystem = sourcePath.getFileSystem(getConf()); FileStatus directory = fileSystem.getFileStatus(sourcePath); Map<String, CopyListingFileStatus> children = new FileStatusTreeTraverser(fileSystem) .preOrderTraversal(directory) .transform(new CopyListingFileStatusFunction(fileSystem, options)) .uniqueIndex(new RelativePathFunction(sourceRootPath)); for (Entry<String, CopyListingFileStatus> entry : children.entrySet()) { LOG.debug("Adding '{}' with relative path '{}'", entry.getValue().getPath(), entry.getKey()); writer.append(new Text(entry.getKey()), entry.getValue()); writer.sync(); } } } }
Example #9
Source File: ZipUnpackerSequenceFileWriter.java From nifi with Apache License 2.0 | 6 votes |
@Override protected void processInputStream(InputStream stream, final FlowFile flowFile, final Writer writer) throws IOException { try (final ZipInputStream zipIn = new ZipInputStream(new BufferedInputStream(stream))) { ZipEntry zipEntry; while ((zipEntry = zipIn.getNextEntry()) != null) { if (zipEntry.isDirectory()) { continue; } final File file = new File(zipEntry.getName()); final String key = file.getName(); long fileSize = zipEntry.getSize(); final InputStreamWritable inStreamWritable = new InputStreamWritable(zipIn, (int) fileSize); writer.append(new Text(key), inStreamWritable); logger.debug("Appending FlowFile {} to Sequence File", new Object[]{key}); } } }
Example #10
Source File: TestSequenceFileSerialization.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public void testJavaSerialization() throws Exception { Path file = new Path(System.getProperty("test.build.data",".") + "/test.seq"); fs.delete(file, true); Writer writer = SequenceFile.createWriter(fs, conf, file, Long.class, String.class); writer.append(1L, "one"); writer.append(2L, "two"); writer.close(); Reader reader = new Reader(fs, file, conf); assertEquals(1L, reader.next((Object) null)); assertEquals("one", reader.getCurrentValue((Object) null)); assertEquals(2L, reader.next((Object) null)); assertEquals("two", reader.getCurrentValue((Object) null)); assertNull(reader.next((Object) null)); reader.close(); }
Example #11
Source File: ConvertFastaForCloud.java From emr-sample-apps with Apache License 2.0 | 6 votes |
/** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Usage: ConvertFastaForCloud file.fa outfile.br"); System.exit(-1); } String infile = args[0]; String outfile = args[1]; System.err.println("Converting " + infile + " into " + outfile); JobConf config = new JobConf(); SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(config), config, new Path(outfile), IntWritable.class, BytesWritable.class); convertFile(infile, writer); writer.close(); System.err.println("min_seq_len: " + min_seq_len); System.err.println("max_seq_len: " + max_seq_len); System.err.println("Using DNAString version: " + DNAString.VERSION); }
Example #12
Source File: TarUnpackerSequenceFileWriter.java From nifi with Apache License 2.0 | 6 votes |
@Override protected void processInputStream(final InputStream stream, final FlowFile tarArchivedFlowFile, final Writer writer) throws IOException { try (final TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(stream))) { TarArchiveEntry tarEntry; while ((tarEntry = tarIn.getNextTarEntry()) != null) { if (tarEntry.isDirectory()) { continue; } final String key = tarEntry.getName(); final long fileSize = tarEntry.getSize(); final InputStreamWritable inStreamWritable = new InputStreamWritable(tarIn, (int) fileSize); writer.append(new Text(key), inStreamWritable); logger.debug("Appending FlowFile {} to Sequence File", new Object[]{key}); } } }
Example #13
Source File: ZipUnpackerSequenceFileWriter.java From localization_nifi with Apache License 2.0 | 6 votes |
@Override protected void processInputStream(InputStream stream, final FlowFile flowFile, final Writer writer) throws IOException { try (final ZipInputStream zipIn = new ZipInputStream(new BufferedInputStream(stream))) { ZipEntry zipEntry; while ((zipEntry = zipIn.getNextEntry()) != null) { if (zipEntry.isDirectory()) { continue; } final File file = new File(zipEntry.getName()); final String key = file.getName(); long fileSize = zipEntry.getSize(); final InputStreamWritable inStreamWritable = new InputStreamWritable(zipIn, (int) fileSize); writer.append(new Text(key), inStreamWritable); logger.debug("Appending FlowFile {} to Sequence File", new Object[]{key}); } } }
Example #14
Source File: WritableValueInputFormatTest.java From kangaroo with Apache License 2.0 | 5 votes |
@Test public void testSetupInput() throws Exception { final Text t1 = new Text("1"); final Text t2 = new Text("2"); final Text t3 = new Text("3"); final Text t4 = new Text("4"); final Text t5 = new Text("5"); final List<Text> values = Lists.newArrayList(t1, t2, t3, t4, t5); final Writer writer = mock(Writer.class); when(path.getFileSystem(conf)).thenReturn(fs); final Job job = mock(Job.class); when(job.getConfiguration()).thenReturn(conf); final String fileName = "file:///tmp/file"; when(path.toString()).thenReturn(fileName); WritableValueInputFormat.doSetupInput(values, Text.class, 2, job, path, writer); final NullWritable key = NullWritable.get(); verify(writer, times(1)).append(key, t1); verify(writer, times(1)).append(key, t2); verify(writer, times(1)).append(key, t3); verify(writer, times(1)).append(key, t4); verify(writer, times(1)).append(key, t5); verify(writer, times(2)).sync(); verify(writer).close(); verify(writer).hflush(); verify(fs).deleteOnExit(path); assertEquals(Text.class, conf.getClass(WritableValueInputFormat.VALUE_TYPE_CONF, NullWritable.class)); assertEquals(2, conf.getInt(WritableValueInputFormat.INPUTS_PER_SPLIT_CONF, -1)); assertEquals(fileName, conf.get(WritableValueInputFormat.INPUT_FILE_LOCATION_CONF)); }
Example #15
Source File: HdfsAbstractSequenceFileSink.java From pulsar with Apache License 2.0 | 5 votes |
protected List<Option> getOptions() throws IllegalArgumentException, IOException { List<Option> list = new ArrayList<Option>(); list.add(Writer.stream(getHdfsStream())); if (getCompressionCodec() != null) { list.add(Writer.compression(SequenceFile.CompressionType.RECORD, getCompressionCodec())); } return list; }
Example #16
Source File: BlurHiveOutputFormat.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getMrWorkingPathWriter( final Configuration configuration) throws IOException { PrivilegedExceptionAction<org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter> privilegedExceptionAction = new PrivilegedExceptionAction<org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter>() { @Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter run() throws Exception { String workingPathStr = configuration.get(BlurConstants.BLUR_BULK_UPDATE_WORKING_PATH); Path workingPath = new Path(workingPathStr); Path tmpDir = new Path(workingPath, "tmp"); FileSystem fileSystem = tmpDir.getFileSystem(configuration); String loadId = configuration.get(BlurSerDe.BLUR_MR_LOAD_ID); Path loadPath = new Path(tmpDir, loadId); final Writer writer = new SequenceFile.Writer(fileSystem, configuration, new Path(loadPath, UUID.randomUUID() .toString()), Text.class, BlurRecord.class); return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() { @Override public void write(Writable w) throws IOException { BlurRecord blurRecord = (BlurRecord) w; String rowId = blurRecord.getRowId(); writer.append(new Text(rowId), blurRecord); } @Override public void close(boolean abort) throws IOException { writer.close(); } }; } }; UserGroupInformation userGroupInformation = getUGI(configuration); try { return userGroupInformation.doAs(privilegedExceptionAction); } catch (InterruptedException e) { throw new IOException(e); } }
Example #17
Source File: DriverTest.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private void generateData(String mrIncWorkingPathStr, String rowId, String recordId, String value) throws IOException { Path path = new Path(new Path(mrIncWorkingPathStr), "new"); Writer writer = new SequenceFile.Writer(miniCluster.getFileSystem(), conf, new Path(path, UUID.randomUUID() .toString()), Text.class, BlurRecord.class); BlurRecord blurRecord = new BlurRecord(); blurRecord.setRowId(rowId); blurRecord.setRecordId(recordId); blurRecord.setFamily("fam0"); blurRecord.addColumn("col0", value); writer.append(new Text(rowId), blurRecord); writer.close(); }
Example #18
Source File: ProcessRecordService.java From hraven with Apache License 2.0 | 5 votes |
/** * @param processFilePath where to write to. * @return Writer for SequenceFile<JobFile, FileStatus> * @throws IOException when bad things happen. */ public Writer createProcessFileWriter(Path processFilePath) throws IOException { Writer indexWriter = SequenceFile.createWriter(fs, myHBaseConf, processFilePath, JobFile.class, FileStatus.class); return indexWriter; }
Example #19
Source File: BlurIndexSimpleWriter.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private SequenceFile.Writer getWriter() throws IOException { if (_writer == null) { _writer = openSeqWriter(); _lastWrite = System.nanoTime(); } return _writer; }
Example #20
Source File: FlowFileStreamUnpackerSequenceFileWriter.java From nifi with Apache License 2.0 | 5 votes |
@Override protected void processInputStream(final InputStream stream, final FlowFile flowFileStreamPackedFlowFile, final Writer writer) throws IOException { final FlowFileUnpackager unpackager = new FlowFileUnpackager(); try (final InputStream in = new BufferedInputStream(stream)) { while (unpackager.hasMoreData()) { unpackager.unpackageFlowFile(stream, writer); } } }
Example #21
Source File: LookupBuilderReducer.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private Writer getRowIdWriter(Text rowId, Reducer<Text, NullWritable, Text, BooleanWritable>.Context context) throws IOException { BlurPartitioner blurPartitioner = new BlurPartitioner(); int shard = blurPartitioner.getShard(rowId, _numberOfShardsInTable); String shardName = ShardUtil.getShardName(shard); Path cachePath = MergeSortRowIdMatcher.getCachePath(_cachePath, _table, shardName); Configuration configuration = context.getConfiguration(); String uuid = configuration.get(IndexerJobDriver.BLUR_UPDATE_ID); Path tmpPath = new Path(cachePath, uuid + "_" + getAttemptString(context)); return _closer.register(MergeSortRowIdMatcher.createWriter(_configuration, tmpPath)); }
Example #22
Source File: ProtoMessageWriter.java From tez with Apache License 2.0 | 5 votes |
ProtoMessageWriter(Configuration conf, Path filePath, Parser<T> parser) throws IOException { this.filePath = filePath; this.writer = SequenceFile.createWriter( conf, Writer.file(filePath), Writer.keyClass(NullWritable.class), Writer.valueClass(ProtoMessageWritable.class), Writer.compression(CompressionType.RECORD)); this.writable = new ProtoMessageWritable<>(parser); }
Example #23
Source File: MergeSortRowIdMatcher.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private void createCacheFile(Path file, SegmentKey segmentKey) throws IOException { LOG.info("Building cache for segment [{0}] to [{1}]", segmentKey, file); Path tmpPath = getTmpWriterPath(file.getParent()); try (Writer writer = createWriter(_configuration, tmpPath)) { DirectoryReader reader = getReader(); for (AtomicReaderContext context : reader.leaves()) { SegmentReader segmentReader = AtomicReaderUtil.getSegmentReader(context.reader()); if (segmentReader.getSegmentName().equals(segmentKey.getSegmentName())) { writeRowIds(writer, segmentReader); break; } } } commitWriter(_configuration, file, tmpPath); }
Example #24
Source File: CopySeq.java From Kylin with Apache License 2.0 | 5 votes |
public static void copyTo64MB(String src, String dst) throws IOException { Configuration hconf = new Configuration(); Path srcPath = new Path(src); Path dstPath = new Path(dst); FileSystem fs = FileSystem.get(hconf); long srcSize = fs.getFileStatus(srcPath).getLen(); int copyTimes = (int) (67108864 / srcSize); // 64 MB System.out.println("Copy " + copyTimes + " times"); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf))); int count = 0; while (reader.next(key, value)) { for (int i = 0; i < copyTimes; i++) { writer.append(key, value); count++; } } System.out.println("Len: " + writer.getLength()); System.out.println("Rows: " + count); reader.close(); writer.close(); }
Example #25
Source File: GitConnector.java From compiler with Apache License 2.0 | 5 votes |
public GitConnector(String path, String projectName, Writer astWriter, long astWriterLen, Writer commitWriter, long commitWriterLen, Writer contentWriter, long contentWriterLen) { this(path, projectName); this.astWriter = astWriter; this.commitWriter = commitWriter; this.contentWriter = contentWriter; this.astWriterLen = astWriterLen; this.commitWriterLen = commitWriterLen; this.contentWriterLen = contentWriterLen; }
Example #26
Source File: HdfsTextSink.java From pulsar with Apache License 2.0 | 5 votes |
@Override protected List<Option> getOptions() throws IllegalArgumentException, IOException { List<Option> opts = super.getOptions(); opts.add(Writer.keyClass(Text.class)); opts.add(Writer.valueClass(Text.class)); return opts; }
Example #27
Source File: HdfsSequentialTextSink.java From pulsar with Apache License 2.0 | 5 votes |
@Override protected List<Option> getOptions() throws IllegalArgumentException, IOException { List<Option> opts = super.getOptions(); opts.add(Writer.keyClass(LongWritable.class)); opts.add(Writer.valueClass(Text.class)); return opts; }
Example #28
Source File: SparkUimaUtils.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public static void createSequenceFile(Object[] params, String uri) throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException { Configuration conf = new Configuration(); Path path = new Path(uri); Writer writer = SequenceFile.createWriter( conf, Writer.file(path), Writer.keyClass(Text.class), Writer.valueClass(SCAS.class)); int count = 0; CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params); for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) { if(JCasUtil.exists(jCas, DocumentMetaData.class)) { ++count; // Get the ID. DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class); String docId = "NULL"; if (dmd != null) { docId = dmd.getDocumentId(); } else { throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText()); } Text docIdText = new Text(docId); SCAS scas = new SCAS(jCas.getCas()); writer.append(docIdText, scas); } jCas.release(); } logger.info("Wrote " + count + " documents to " + uri); IOUtils.closeStream(writer); }
Example #29
Source File: FlowFileStreamUnpackerSequenceFileWriter.java From localization_nifi with Apache License 2.0 | 5 votes |
@Override protected void processInputStream(final InputStream stream, final FlowFile flowFileStreamPackedFlowFile, final Writer writer) throws IOException { final FlowFileUnpackager unpackager = new FlowFileUnpackager(); try (final InputStream in = new BufferedInputStream(stream)) { while (unpackager.hasMoreData()) { unpackager.unpackageFlowFile(stream, writer); } } }
Example #30
Source File: CircusTrainCopyListing.java From circus-train with Apache License 2.0 | 5 votes |
private Writer newWriter(Path pathToListFile) throws IOException { FileSystem fs = pathToListFile.getFileSystem(getConf()); if (fs.exists(pathToListFile)) { fs.delete(pathToListFile, false); } return createWriter(getConf(), file(pathToListFile), keyClass(Text.class), valueClass(CopyListingFileStatus.class), compression(NONE)); }