org.apache.hadoop.mapreduce.RecordReader Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.RecordReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EthereumFormatHadoopTest.java From hadoopcryptoledger with Apache License 2.0 | 6 votes |
@Test public void readEthereumBlockInputFormatBlock1346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="eth1346406.bin"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block 1346406"); RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); EthereumBlock block = new EthereumBlock(); assertTrue( reader.nextKeyValue(),"Input Split for block 1346406 contains at least one block"); key=reader.getCurrentKey(); block=reader.getCurrentValue(); assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions"); assertFalse( reader.nextKeyValue(),"No further blocks in block 1346406"); reader.close(); }
Example #2
Source File: JSONInputFormat.java From kite with Apache License 2.0 | 6 votes |
@Override public RecordReader<E, Void> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = Hadoop.TaskAttemptContext .getConfiguration.invoke(context); Path path; if (split instanceof FileSplit) { path = ((FileSplit) split).getPath(); } else { throw new DatasetOperationException( "Split is not a FileSplit: %s:%s", split.getClass().getCanonicalName(), split); } JSONFileReader<E> reader = new JSONFileReader<E>( path.getFileSystem(conf), path, accessor); reader.initialize(); return reader.asRecordReader(); }
Example #3
Source File: SQLServerDBInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
@Override /** {@inheritDoc} */ protected RecordReader<LongWritable, T> createDBRecordReader( DBInputSplit split, Configuration conf) throws IOException { DBConfiguration dbConf = getDBConf(); Class<T> inputClass = (Class<T>) (dbConf.getInputClass()); String dbProductName = getDBProductName(); LOG.debug("Creating db record reader for db product: " + dbProductName); try { return new SQLServerDBRecordReader<T>(split, inputClass, conf, getConnection(), dbConf, dbConf.getInputConditions(), dbConf.getInputFieldNames(), dbConf.getInputTableName(), dbProductName); } catch (SQLException ex) { throw new IOException(ex); } }
Example #4
Source File: EthereumFormatHadoopTest.java From hadoopcryptoledger with Apache License 2.0 | 6 votes |
@Test public void readEthereumBlockInputFormatBlock1() throws IOException, EthereumBlockReadException, ParseException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="eth1.bin"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block 1"); RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); EthereumBlock block = new EthereumBlock(); assertTrue( reader.nextKeyValue(),"Input Split for block 1 contains at least one block"); key=reader.getCurrentKey(); block=reader.getCurrentValue(); assertEquals( 0, block.getEthereumTransactions().size(),"Block 1 must have 0 transactions"); assertFalse( reader.nextKeyValue(),"No further blocks in block 1"); reader.close(); }
Example #5
Source File: Chain.java From big-c with Apache License 2.0 | 6 votes |
/** * Add mapper(the first mapper) that reads input from the input * context and writes to queue */ @SuppressWarnings("unchecked") void addMapper(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> output, int index) throws IOException, InterruptedException { Configuration conf = getConf(index); Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class); RecordReader rr = new ChainRecordReader(inputContext); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf); Mapper.Context mapperContext = createMapContext(rr, rw, (MapContext) inputContext, getConf(index)); MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw); threads.add(runner); }
Example #6
Source File: ExcelFileInputFormat.java From components with Apache License 2.0 | 6 votes |
@Override public RecordReader<Void, IndexedRecord> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { String encoding = context.getConfiguration().get(TALEND_ENCODING); String sheet = context.getConfiguration().get(TALEND_EXCEL_SHEET_NAME); long header = context.getConfiguration().getLong(TALEND_HEADER, 0l); long footer = context.getConfiguration().getLong(TALEND_FOOTER, 0l); String excelFormat = context.getConfiguration().get(TALEND_EXCEL_FORMAT, "EXCEL2007"); long limit = context.getConfiguration().getLong(TALEND_EXCEL_LIMIT, -1); if("EXCEL2007".equals(excelFormat)) { return new Excel2007FileRecordReader(sheet, header, footer, limit); } else if("EXCEL97".equals(excelFormat)) { return new Excel97FileRecordReader(sheet, header, footer, limit); } else if("HTML".equals(excelFormat)) { return new ExcelHTMLFileRecordReader(encoding, header, footer, limit); } throw new IOException("not a valid excel format"); }
Example #7
Source File: EthereumFormatHadoopTest.java From hadoopcryptoledger with Apache License 2.0 | 6 votes |
@Test public void readEthereumBlockInputFormatGenesisBlock() throws IOException, EthereumBlockReadException, ParseException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="ethgenesis.bin"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for genesis block"); RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); EthereumBlock block = new EthereumBlock(); assertTrue( reader.nextKeyValue(),"Input Split for genesis block contains at least one block"); key=reader.getCurrentKey(); block=reader.getCurrentValue(); assertEquals( 0, block.getEthereumTransactions().size(),"Genesis Block must have 0 transactions"); assertFalse( reader.nextKeyValue(),"No further blocks in genesis Block"); reader.close(); }
Example #8
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "sax"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password", "test2"); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); InterruptedException ex = assertThrows(InterruptedException.class, () -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password"); }
Example #9
Source File: HadoopCompat.java From stratio-cassandra with Apache License 2.0 | 5 votes |
/** * Instantiates MapContext under Hadoop 1 and MapContextImpl under Hadoop 2. */ public static MapContext newMapContext(Configuration conf, TaskAttemptID taskAttemptID, RecordReader recordReader, RecordWriter recordWriter, OutputCommitter outputCommitter, StatusReporter statusReporter, InputSplit inputSplit) { return (MapContext) newInstance(MAP_CONTEXT_CONSTRUCTOR, conf, taskAttemptID, recordReader, recordWriter, outputCommitter, statusReporter, inputSplit); }
Example #10
Source File: MultiMapperInputFormat.java From Cubert with Apache License 2.0 | 5 votes |
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { MultiMapperSplit mmSplit = (MultiMapperSplit) split; int multiMapperIndex = mmSplit.getMultiMapperIndex(); return getDelegate(context.getConfiguration(), multiMapperIndex).createRecordReader(mmSplit.getActualSplit(), context); }
Example #11
Source File: TestFixedLengthInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** * Test with no record length set. */ @Test (timeout=5000) public void testNoRecordLength() throws Exception { localFs.delete(workDir, true); Path file = new Path(workDir, new String("testFormat.txt")); createFile(file, null, 10, 10); // Create the job and do not set fixed record length Job job = Job.getInstance(defaultConf); FileInputFormat.setInputPaths(job, workDir); FixedLengthInputFormat format = new FixedLengthInputFormat(); List<InputSplit> splits = format.getSplits(job); boolean exceptionThrown = false; for (InputSplit split : splits) { try { TaskAttemptContext context = MapReduceTestUtil. createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); } catch(IOException ioe) { exceptionThrown = true; LOG.info("Exception message:" + ioe.getMessage()); } } assertTrue("Exception for not setting record length:", exceptionThrown); }
Example #12
Source File: RowInputFormat.java From gemfirexd-oss with Apache License 2.0 | 5 votes |
@Override public RecordReader<Key, Row> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { RowRecordReader reader = new RowRecordReader(); reader.initialize(split, context); return reader; }
Example #13
Source File: ParquetLoader.java From parquet-mr with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void prepareToRead(@SuppressWarnings("rawtypes") RecordReader reader, PigSplit split) throws IOException { LOG.debug("LoadFunc.prepareToRead({}, {})", reader, split); this.reader = reader; }
Example #14
Source File: TestCombineFileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
@Test public void testRecordReaderInit() throws InterruptedException, IOException { // Test that we properly initialize the child recordreader when // CombineFileInputFormat and CombineFileRecordReader are used. TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); Configuration conf1 = new Configuration(); conf1.set(DUMMY_KEY, "STATE1"); TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId); // This will create a CombineFileRecordReader that itself contains a // DummyRecordReader. InputFormat inputFormat = new ChildRRInputFormat(); Path [] files = { new Path("file1") }; long [] lengths = { 1 }; CombineFileSplit split = new CombineFileSplit(files, lengths); RecordReader rr = inputFormat.createRecordReader(split, context1); assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader); // Verify that the initial configuration is the one being used. // Right after construction the dummy key should have value "STATE1" assertEquals("Invalid initial dummy key value", "STATE1", rr.getCurrentKey().toString()); // Switch the active context for the RecordReader... Configuration conf2 = new Configuration(); conf2.set(DUMMY_KEY, "STATE2"); TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId); rr.initialize(split, context2); // And verify that the new context is updated into the child record reader. assertEquals("Invalid secondary dummy key value", "STATE2", rr.getCurrentKey().toString()); }
Example #15
Source File: MneMapreducePersonDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@Test(enabled = true, dependsOnMethods = { "testWritePersonData" }) public void testReadPersonData() throws Exception { long sumage = 0L; long reccnt = 0L; File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { System.out.println(String.format("Verifying : %s", listfiles[idx].getName())); FileSplit split = new FileSplit( new Path(m_workdir, listfiles[idx].getName()), 0, 0L, new String[0]); InputFormat<NullWritable, MneDurableInputValue<Person<Long>>> inputFormat = new MneInputFormat<MneDurableInputValue<Person<Long>>, Person<Long>>(); RecordReader<NullWritable, MneDurableInputValue<Person<Long>>> reader = inputFormat.createRecordReader(split, m_tacontext); MneDurableInputValue<Person<Long>> personval = null; while (reader.nextKeyValue()) { personval = reader.getCurrentValue(); AssertJUnit.assertTrue(personval.getValue().getAge() < 51); sumage += personval.getValue().getAge(); ++reccnt; } reader.close(); } } AssertJUnit.assertEquals(m_reccnt, reccnt); AssertJUnit.assertEquals(m_sumage, sumage); System.out.println(String.format("The checksum of ages is %d", sumage)); }
Example #16
Source File: AllLoader.java From spork with Apache License 2.0 | 5 votes |
@Override public RecordReader<Writable, Writable> createRecordReader( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { // this method plugs the AllReader into the system, and the // AllReader will when called select the correct LoadFunc // return new AllReader(udfSignature); return new AllReader(udfSignature); }
Example #17
Source File: AvroInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(split.toString()); return new AvroRecordReader<T>(); }
Example #18
Source File: TestCombineFileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public RecordReader<Text,Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { return new CombineFileRecordReader((CombineFileSplit) split, context, (Class) DummyRecordReader.class); }
Example #19
Source File: TestInputSampler.java From hadoop with Apache License 2.0 | 5 votes |
@Override public org.apache.hadoop.mapred.RecordReader<IntWritable, NullWritable> getRecordReader(final org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter) throws IOException { return new org.apache.hadoop.mapred.RecordReader <IntWritable, NullWritable>() { private final IntWritable i = new IntWritable(((MapredSequentialSplit)split).getInit()); private int maxVal = i.get() + maxDepth + 1; @Override public boolean next(IntWritable key, NullWritable value) throws IOException { i.set(i.get() + 1); return i.get() < maxVal; } @Override public IntWritable createKey() { return new IntWritable(i.get()); } @Override public NullWritable createValue() { return NullWritable.get(); } @Override public long getPos() throws IOException { return 0; } @Override public void close() throws IOException { } @Override public float getProgress() throws IOException { return 0; } }; }
Example #20
Source File: TestCombineFileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public RecordReader<Text,Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { return new CombineFileRecordReader((CombineFileSplit) split, context, (Class) DummyRecordReader.class); }
Example #21
Source File: CompositeInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** * Construct a CompositeRecordReader for the children of this InputFormat * as defined in the init expression. * The outermost join need only be composable, not necessarily a composite. * Mandating TupleWritable isn't strictly correct. */ @SuppressWarnings("unchecked") // child types unknown public RecordReader<K,TupleWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { setFormat(taskContext.getConfiguration()); return root.createRecordReader(split, taskContext); }
Example #22
Source File: AvroMultipleInputsKeyInputFormat.java From datafu with Apache License 2.0 | 5 votes |
/** {@inheritDoc} */ @Override public RecordReader<AvroKey<T>, NullWritable> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema readerSchema = AvroMultipleInputsUtil.getInputKeySchemaForSplit(context.getConfiguration(), split); if (readerSchema == null) { throw new RuntimeException("Could not determine input schema"); } return new AvroKeyRecordReader<T>(readerSchema); }
Example #23
Source File: JsonFileInputFormat.java From jumbune with GNU Lesser General Public License v3.0 | 5 votes |
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { RecordReader<LongWritable, Text> recordReader = null; try { recordReader = new JsonFileRecordReader(split, context); } catch (IOException ioe) { LOGGER.error(ioe); } return recordReader; }
Example #24
Source File: EmoInputFormat.java From emodb with Apache License 2.0 | 5 votes |
@Override public RecordReader<Text, Row> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { FileSplit fileSplit = (FileSplit) split; Path path = fileSplit.getPath(); return new EmoRecordReader(BaseInputFormat.createRecordReader(context.getConfiguration(), path)); }
Example #25
Source File: FixedLengthInputFormat.java From big-c with Apache License 2.0 | 5 votes |
@Override public RecordReader<LongWritable, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { int recordLength = getRecordLength(context.getConfiguration()); if (recordLength <= 0) { throw new IOException("Fixed record length " + recordLength + " is invalid. It should be set to a value greater than zero"); } return new FixedLengthRecordReader(recordLength); }
Example #26
Source File: PhoenixHBaseLoader.java From phoenix with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { this.reader = reader; final String resourceSchemaAsStr = getValueFromUDFContext(this.contextSignature,RESOURCE_SCHEMA_SIGNATURE); if (resourceSchemaAsStr == null) { throw new IOException("Could not find schema in UDF context"); } schema = (ResourceSchema)ObjectSerializer.deserialize(resourceSchemaAsStr); }
Example #27
Source File: PigAvroInputFormatAdaptor.java From Cubert with Apache License 2.0 | 5 votes |
@Override public RecordReader<NullWritable, Writable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return getDelegate(context.getConfiguration()).createRecordReader(split, context); }
Example #28
Source File: FileSystemViewKeyInputFormat.java From kite with Apache License 2.0 | 5 votes |
@Override public RecordReader<E, Void> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { RecordReader<E, Void> unfilteredRecordReader = createUnfilteredRecordReader (inputSplit, taskAttemptContext); if (view != null) { // use the constraints to filter out entities from the reader return new FilteredRecordReader<E>(unfilteredRecordReader, ((AbstractRefinableView) view).getConstraints(), view.getAccessor()); } return unfilteredRecordReader; }
Example #29
Source File: CombineAvroKeyInputFormat.java From incubator-pinot with Apache License 2.0 | 5 votes |
@Override public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { Class cls = AvroKeyRecordReaderWrapper.class; return new CombineFileRecordReader<>((CombineFileSplit) split, context, (Class<? extends RecordReader<AvroKey<T>, NullWritable>>) cls); }
Example #30
Source File: EthereumFormatHadoopTest.java From hadoopcryptoledger with Apache License 2.0 | 5 votes |
@Test public void readEthereumBlockInputFormatBlock0to10() throws IOException, EthereumBlockReadException, ParseException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName="eth0to10.bin"; String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile(); Path file = new Path(fileNameBlock); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, file); EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat(); List<InputSplit> splits = format.getSplits(job); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); assertEquals( 1, splits.size(),"Only one split generated for block 0..10"); RecordReader<BytesWritable, EthereumBlock> reader = format.createRecordReader(splits.get(0), context); assertNotNull( reader,"Format returned null RecordReader"); reader.initialize(splits.get(0),context); BytesWritable key = new BytesWritable(); EthereumBlock block = new EthereumBlock(); int count=0; while (count<11) { if (reader.nextKeyValue()) { count++; } } assertEquals(11,count,"Block 0..10 contains 11 blocks"); assertFalse( reader.nextKeyValue(),"No further blocks in block 0..10"); reader.close(); }