org.apache.hadoop.mapreduce.RecordWriter Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.RecordWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExcelRowFileOutputFormat.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Override public RecordWriter<NullWritable, ArrayWritable> getRecordWriter(TaskAttemptContext context) throws IOException { // check if mimeType is set. If not assume new Excel format (.xlsx) Configuration conf=context.getConfiguration(); String defaultConf=conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,ExcelFileOutputFormat.DEFAULT_MIMETYPE); conf.set(HadoopOfficeWriteConfiguration.CONF_MIMETYPE,defaultConf); // add suffix Path file = getDefaultWorkFile(context,ExcelFileOutputFormat.getSuffix(conf.get(HadoopOfficeWriteConfiguration.CONF_MIMETYPE))); try { return new ExcelRowRecordWriter<>(HadoopUtil.getDataOutputStream(conf,file,context,getCompressOutput(context),getOutputCompressorClass(context, ExcelFileOutputFormat.defaultCompressorClass)),file.getName(),conf); } catch (InvalidWriterConfigurationException | InvalidCellSpecificationException | FormatNotUnderstoodException | GeneralSecurityException | OfficeWriterException e) { LOG.error(e); } return null; }
Example #2
Source File: TestFileOutputCommitter.java From hadoop with Apache License 2.0 | 6 votes |
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
Example #3
Source File: MneMapreducePersonDataTest.java From mnemonic with Apache License 2.0 | 6 votes |
@Test(enabled = true) public void testWritePersonData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>(); RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat.getRecordWriter(m_tacontext); Person<Long> person = null; for (int i = 0; i < m_reccnt; ++i) { person = sess.newDurableObjectRecord(); person.setAge((short) m_rand.nextInt(50)); person.setName(String.format("Name: [%s]", Utils.genRandomString()), true); m_sumage += person.getAge(); writer.write(nada, mdvalue.of(person)); } writer.close(m_tacontext); sess.close(); }
Example #4
Source File: TestFileOutputCommitter.java From big-c with Apache License 2.0 | 6 votes |
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
Example #5
Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
Example #6
Source File: Chain.java From big-c with Apache License 2.0 | 6 votes |
/** * Add mapper that reads and writes from/to the queue */ @SuppressWarnings("unchecked") void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input, ChainBlockingQueue<KeyValuePair<?, ?>> output, TaskInputOutputContext context, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class); Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class); Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf); MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr, rw, context, getConf(index)), rr, rw); threads.add(runner); }
Example #7
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 6 votes |
private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
Example #8
Source File: Chain.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add mapper that reads and writes from/to the queue */ @SuppressWarnings("unchecked") void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input, ChainBlockingQueue<KeyValuePair<?, ?>> output, TaskInputOutputContext context, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class); Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class); Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf); MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr, rw, context, getConf(index)), rr, rw); threads.add(runner); }
Example #9
Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private void writeOutput(RecordWriter theRecordWriter, TaskAttemptContext context) throws IOException, InterruptedException { NullWritable nullWritable = NullWritable.get(); try { theRecordWriter.write(key1, val1); theRecordWriter.write(null, nullWritable); theRecordWriter.write(null, val1); theRecordWriter.write(nullWritable, val2); theRecordWriter.write(key2, nullWritable); theRecordWriter.write(key1, null); theRecordWriter.write(null, null); theRecordWriter.write(key2, val2); } finally { theRecordWriter.close(context); } }
Example #10
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 5 votes |
private void testCommitterInternal(int version) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
Example #11
Source File: ChainReduceContextImpl.java From hadoop with Apache License 2.0 | 5 votes |
public ChainReduceContextImpl( ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> base, RecordWriter<KEYOUT, VALUEOUT> output, Configuration conf) { this.base = base; this.rw = output; this.conf = conf; }
Example #12
Source File: ExportOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new ExportRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
Example #13
Source File: SqlServerExportBatchOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** {@inheritDoc} */ @Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new SqlServerExportBatchRecordWriter<K, V>(context); } catch (Exception e) { throw new IOException(e); } }
Example #14
Source File: HDFSWriter.java From ViraPipe with MIT License | 5 votes |
@Override public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx, Path outputPath) throws IOException { // the writers require a header in order to create a codec, even if // the header isn't being written out setSAMHeader(samheader); setWriteHeader(writeHeader); return super.getRecordWriter(ctx, outputPath); }
Example #15
Source File: Chain.java From big-c with Apache License 2.0 | 5 votes |
public MapRunner(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> mapper, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context mapperContext, RecordReader<KEYIN, VALUEIN> rr, RecordWriter<KEYOUT, VALUEOUT> rw) throws IOException, InterruptedException { this.mapper = mapper; this.rr = rr; this.rw = rw; this.chainContext = mapperContext; }
Example #16
Source File: SqlServerUpsertOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new SqlServerUpsertRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
Example #17
Source File: PravegaFixedSegmentsOutputFormat.java From pravega-samples with Apache License 2.0 | 5 votes |
@Override public RecordWriter<String, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); final String scopeName = Optional.ofNullable(conf.get(OUTPUT_SCOPE_NAME)).orElseThrow(() -> new IOException("The input scope name must be configured (" + OUTPUT_SCOPE_NAME + ")")); final String streamName = Optional.ofNullable(conf.get(OUTPUT_STREAM_NAME)).orElseThrow(() -> new IOException("The input stream name must be configured (" + OUTPUT_STREAM_NAME + ")")); final URI controllerURI = Optional.ofNullable(conf.get(OUTPUT_URI_STRING)).map(URI::create).orElseThrow(() -> new IOException("The Pravega controller URI must be configured (" + OUTPUT_URI_STRING + ")")); final String deserializerClassName = Optional.ofNullable(conf.get(OUTPUT_DESERIALIZER)).orElseThrow(() -> new IOException("The event deserializer must be configured (" + OUTPUT_DESERIALIZER + ")")); final int segments = Integer.parseInt(conf.get(OUTPUT_STREAM_SEGMENTS, "3")); StreamManager streamManager = StreamManager.create(controllerURI); streamManager.createScope(scopeName); StreamConfiguration streamConfig = StreamConfiguration.builder() .scalingPolicy(ScalingPolicy.fixed(segments)) .build(); streamManager.createStream(scopeName, streamName, streamConfig); EventStreamClientFactory clientFactory = (externalClientFactory != null) ? externalClientFactory : EventStreamClientFactory.withScope(scopeName, ClientConfig.builder().controllerURI(controllerURI).build()); Serializer deserializer; try { Class<?> deserializerClass = Class.forName(deserializerClassName); deserializer = (Serializer<V>) deserializerClass.newInstance(); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { log.error("Exception when creating deserializer: {}", e); throw new IOException( "Unable to create the event deserializer (" + deserializerClassName + ")", e); } EventStreamWriter<V> writer = clientFactory.createEventWriter(streamName, deserializer, EventWriterConfig.builder().build()); return new PravegaOutputRecordWriter<V>(writer); }
Example #18
Source File: TaskInputOutputContextImpl.java From big-c with Apache License 2.0 | 5 votes |
public TaskInputOutputContextImpl(Configuration conf, TaskAttemptID taskid, RecordWriter<KEYOUT,VALUEOUT> output, OutputCommitter committer, StatusReporter reporter) { super(conf, taskid, reporter); this.output = output; this.committer = committer; }
Example #19
Source File: Chain.java From big-c with Apache License 2.0 | 5 votes |
/** * Create a map context that is based on ChainMapContext and the given record * reader and record writer */ private <KEYIN, VALUEIN, KEYOUT, VALUEOUT> Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createMapContext( RecordReader<KEYIN, VALUEIN> rr, RecordWriter<KEYOUT, VALUEOUT> rw, TaskInputOutputContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> context, Configuration conf) { MapContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> mapContext = new ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>( context, rr, rw, conf); Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context mapperContext = new WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>() .getMapContext(mapContext); return mapperContext; }
Example #20
Source File: ExportCallOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override /** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { try { return new ExportCallRecordWriter(context); } catch (Exception e) { throw new IOException(e); } }
Example #21
Source File: LogOutputFormat.java From 163-bigdate-note with GNU General Public License v3.0 | 5 votes |
public void close(TaskAttemptContext context) throws IOException, InterruptedException { Iterator<RecordWriter<K, V>> values = this.recordWriter.values().iterator(); while (values.hasNext()) { values.next().close(context); } this.recordWriter.clear(); }
Example #22
Source File: ContentReader.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
@Override public RecordWriter<DocumentURI, DatabaseDocument> getRecordWriter( TaskAttemptContext context) throws IOException, InterruptedException { return new CustomWriter(getOutputPath(context), context.getConfiguration()); }
Example #23
Source File: Chain.java From big-c with Apache License 2.0 | 5 votes |
/** * Add mapper(the last mapper) that reads input from * queue and writes output to the output context */ @SuppressWarnings("unchecked") void addMapper(ChainBlockingQueue<KeyValuePair<?, ?>> input, TaskInputOutputContext outputContext, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class); Class<?> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf); RecordWriter rw = new ChainRecordWriter(outputContext); MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr, rw, outputContext, getConf(index)), rr, rw); threads.add(runner); }
Example #24
Source File: LogOutputFormat.java From 163-bigdate-note with GNU General Public License v3.0 | 5 votes |
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { if (writer == null) { writer = new MultiRecordWriter(job, getTaskOutputPath(job)); } return writer; }
Example #25
Source File: GenerateData.java From hadoop with Apache License 2.0 | 5 votes |
@Override public RecordWriter<NullWritable,BytesWritable> getRecordWriter( TaskAttemptContext job) throws IOException { return new ChunkWriter(getDefaultWorkFile(job, ""), job.getConfiguration()); }
Example #26
Source File: LogOutputFormat.java From 163-bigdate-note with GNU General Public License v3.0 | 5 votes |
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { if (writer == null) { writer = new MultiRecordWriter(job, getTaskOutputPath(job)); } return writer; }
Example #27
Source File: TeraOutputFormat.java From pravega-samples with Apache License 2.0 | 5 votes |
public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job ) throws IOException { Path file = getDefaultWorkFile(job, ""); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new TeraRecordWriter(fileOut, job); }
Example #28
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 5 votes |
private void testMapFileOutputCommitterInternal(int version) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeMapFileOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
Example #29
Source File: TransformerOutputFormat.java From BigDataPlatform with GNU General Public License v3.0 | 5 votes |
@Override public RecordWriter<BaseDimension, BaseStatsValueWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Connection conn = null; IDimensionConverter converter = DimensionConverterClient.createDimensionConverter(conf); try { conn = JdbcManager.getConnection(conf, GlobalConstants.WAREHOUSE_OF_WEBSITE); conn.setAutoCommit(false); } catch (SQLException e) { logger.error("获取数据库连接失败", e); throw new IOException("获取数据库连接失败", e); } return new TransformerRecordWriter(conn, conf, converter); }
Example #30
Source File: SingleDocumentOutputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
@Override public RecordWriter<DocumentURI, MarkLogicDocument> getRecordWriter( TaskAttemptContext contex) throws IOException, InterruptedException { Configuration conf = contex.getConfiguration(); String p = conf.get(ConfigConstants.CONF_OUTPUT_FILEPATH); Path path = new Path(p); return new SingleDocumentWriter(path, conf); }