org.apache.avro.mapred.AvroWrapper Java Examples
The following examples show how to use
org.apache.avro.mapred.AvroWrapper.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!reader.hasNext() || reader.pastSync(end)) { key = null; value = null; return false; } if (key == null) { key = new AvroWrapper<T>(); } if (value == null) { value = NullWritable.get(); } key.datum(reader.next(key.datum())); return true; }
Example #2
Source File: AvroMixedMapReduce.java From hiped2 with Apache License 2.0 | 6 votes |
public void reduce(Text key, Iterator<DoubleWritable> values, OutputCollector<AvroWrapper<StockAvg>, NullWritable> output, Reporter reporter) throws IOException { Mean mean = new Mean(); while (values.hasNext()) { mean.increment(values.next().get()); } StockAvg avg = new StockAvg(); avg.setSymbol(key.toString()); avg.setAvg(mean.getResult()); output.collect(new AvroWrapper<StockAvg>(avg), NullWritable.get()); }
Example #3
Source File: AvroRowsFunction.java From tablasco with Apache License 2.0 | 6 votes |
@Override public List<Object> call(Tuple2<AvroWrapper, NullWritable> avroTuple) { final GenericData.Record datum = (GenericData.Record) avroTuple._1().datum(); List<Object> row = new ArrayList<>(this.headers.size()); for (String header : this.headers) { Object value = datum.get(header); if (value instanceof CharSequence) // Avro Utf8 type { value = value.toString(); } row.add(value); } return row; }
Example #4
Source File: ParquetReaderTest.java From reef with Apache License 2.0 | 5 votes |
@Test public void testDataEntries() throws IOException, InjectionException { final JavaConfigurationBuilder builder = Tang.Factory.getTang().newConfigurationBuilder(); builder.bindNamedParameter(PathString.class, file.getAbsolutePath()); final Configuration conf = builder.build(); final Injector injector = Tang.Factory.getTang().newInjector(conf); final ParquetReader reader = injector.getInstance(ParquetReader.class); final byte[] byteArr = reader.serializeToByteBuffer().array(); final ByteArrayInputStream inputStream = new ByteArrayInputStream(byteArr); final DatumReader datumReader = new GenericDatumReader<GenericRecord>(); datumReader.setSchema(reader.createAvroSchema()); final AvroKeyDeserializer deserializer = new AvroKeyDeserializer<GenericRecord>(reader.createAvroSchema(), reader.createAvroSchema(), datumReader); deserializer.open(inputStream); AvroWrapper<GenericRecord> record = null; for (int i = 0; i < 10; i = i + 1) { record = deserializer.deserialize(record); Assert.assertEquals("User_" + i, record.datum().get("name").toString()); Assert.assertEquals(i, record.datum().get("age")); Assert.assertEquals("blue", record.datum().get("favorite_color").toString()); } }
Example #5
Source File: AvroFileAccessor.java From pxf with Apache License 2.0 | 5 votes |
@Override public boolean openForRead() throws Exception { // Pass the schema to the AvroInputFormat AvroJob.setInputSchema(jobConf, schema); // The avroWrapper required for the iteration avroWrapper = new AvroWrapper<>(); return super.openForRead(); }
Example #6
Source File: AvroAsTextOutputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 5 votes |
@Override public void write(K2 k, V2 v) throws IOException { GenericRecord record = fromText(k.toString() + "\t" + v.toString(), schema); AvroWrapper<GenericRecord> wrapper = new AvroWrapper<GenericRecord>(record); writer.append(wrapper.datum()); }
Example #7
Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0 | 5 votes |
@Override public void close() throws IOException { System.out.println(filter); if (collector != null) { collector.collect( new AvroWrapper<GenericRecord>( AvroBytesRecord.toGenericRecord(filter)), NullWritable.get()); } }
Example #8
Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0 | 5 votes |
@Override public void reduce(NullWritable key, Iterator<BloomFilter> values, OutputCollector<AvroWrapper<GenericRecord>, NullWritable> output, Reporter reporter) throws IOException { while (values.hasNext()) { BloomFilter bf = values.next(); filter.or(bf); System.out.println(filter); } collector = output; }
Example #9
Source File: SmallFilesMapReduce.java From hiped2 with Apache License 2.0 | 5 votes |
public void map(AvroWrapper<GenericRecord> key, NullWritable value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { outKey.set( key.datum().get(SmallFilesWrite.FIELD_FILENAME).toString()); outValue.set(DigestUtils.md5Hex( ((ByteBuffer) key.datum().get(SmallFilesWrite.FIELD_CONTENTS)) .array())); output.collect(outKey, outValue); }
Example #10
Source File: AvroMixedMapReduce.java From hiped2 with Apache License 2.0 | 5 votes |
public void map(AvroWrapper<Stock> key, NullWritable value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException { output.collect(new Text(key.datum().getSymbol().toString()), new DoubleWritable(key.datum().getOpen())); }
Example #11
Source File: DBImportMapReduce.java From hiped2 with Apache License 2.0 | 5 votes |
public void map(LongWritable key, StockDbWritable value, OutputCollector<AvroWrapper<Stock>, NullWritable> output, Reporter reporter) throws IOException { output.collect( new AvroWrapper<Stock>(writableToAvro(value)), NullWritable.get()); }
Example #12
Source File: AvroDataSupplier.java From tablasco with Apache License 2.0 | 5 votes |
@Override public DistributedTable get() { JavaPairRDD<AvroWrapper, NullWritable> avroRdd = this.sparkContext.hadoopFile(this.dataPath.toString(), AvroInputFormat.class, AvroWrapper.class, NullWritable.class); LOGGER.info("data location: {}", this.dataPath); List<String> headers = avroRdd.keys().map(new AvroHeadersFunction()).first(); LOGGER.info("data headers: {}", headers); JavaRDD<List<Object>> rows = avroRdd.map(new AvroRowsFunction(headers)); return new DistributedTable(headers, rows); }
Example #13
Source File: AvroInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(split.toString()); return new AvroRecordReader<T>(); }
Example #14
Source File: AvroOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter( TaskAttemptContext context) throws IOException, InterruptedException { boolean isMapOnly = context.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(context.getConfiguration()) : AvroJob.getOutputSchema(context.getConfiguration()); final DataFileWriter<T> WRITER = new DataFileWriter<T>(new ReflectDatumWriter<T>()); configureDataFileWriter(WRITER, context); Path path = getDefaultWorkFile(context, EXT); WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { WRITER.append(wrapper.datum()); } @Override public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { WRITER.close(); } }; }
Example #15
Source File: MergeAvroReducer.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override protected void setup(Context context) throws IOException, InterruptedException { wrapper = new AvroWrapper<GenericRecord>(); schema = AvroJob.getOutputSchema(context.getConfiguration()); bigDecimalFormatString = context.getConfiguration().getBoolean( ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); }
Example #16
Source File: DBImportMapReduce.java From hiped2 with Apache License 2.0 | 4 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.OutputFileOption.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path output = new Path(cli.getArgValueAsString(CliCommonOpts.OutputFileOption.OUTPUT)); Configuration conf = super.getConf(); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost/sqoop_test" + "?user=hip_sqoop_user&password=password"); JobConf job = new JobConf(conf); job.setJarByClass(DBImportMapReduce.class); job.setInputFormat(DBInputFormat.class); job.setOutputFormat(AvroOutputFormat.class); AvroJob.setOutputSchema(job, Stock.SCHEMA$); job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName()); job.setMapperClass(Map.class); job.setNumMapTasks(4); job.setNumReduceTasks(0); job.setMapOutputKeyClass(AvroWrapper.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(AvroWrapper.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, output); DBInputFormat.setInput( job, StockDbWritable.class, "select * from stocks", "SELECT COUNT(id) FROM stocks"); RunningJob runningJob = JobClient.runJob(job); return runningJob.isSuccessful() ? 0 : 1; }
Example #17
Source File: AvroHeadersFunction.java From tablasco with Apache License 2.0 | 4 votes |
@Override public List<String> call(AvroWrapper avroWrapper) { return getColumns(((GenericData.Record) avroWrapper.datum()).getSchema().getFields()); }
Example #18
Source File: AvroRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
@Override public AvroWrapper<T> getCurrentKey() throws IOException, InterruptedException { return key; }
Example #19
Source File: MergeAvroMapper.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
@Override public void map(AvroWrapper<GenericRecord> key, NullWritable val, Context c) throws IOException, InterruptedException { processRecord(toSqoopRecord(key.datum()), c); }
Example #20
Source File: AvroExportMapper.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
@Override protected void map(AvroWrapper<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException { context.write(toSqoopRecord(key.datum()), NullWritable.get()); }