org.apache.hadoop.hive.ql.plan.MapredWork Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.plan.MapredWork.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestHoodieCombineHiveInputFormat.java From hudi with Apache License 2.0 | 4 votes |
@Test @Disabled public void testHoodieRealtimeCombineHoodieInputFormat() throws Exception { Configuration conf = new Configuration(); // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema()); HoodieTestUtils.init(hadoopConf, tempDir.toAbsolutePath().toString(), HoodieTableType.MERGE_ON_READ); String commitTime = "100"; final int numRecords = 1000; // Create 3 parquet files with 1000 records each File partitionDir = InputFormatTestUtil.prepareParquetTable(tempDir, schema, 3, numRecords, commitTime); InputFormatTestUtil.commit(tempDir, commitTime); // insert 1000 update records to log file 0 String newCommitTime = "101"; HoodieLogFormat.Writer writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid0", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); // insert 1000 update records to log file 1 writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid1", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); // insert 1000 update records to log file 2 writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid2", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); TableDesc tblDesc = Utilities.defaultTd; // Set the input format tblDesc.setInputFileFormatClass(HoodieCombineHiveInputFormat.class); PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path(tempDir.toAbsolutePath().toString()), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Path mapWorkPath = new Path(tempDir.toAbsolutePath().toString()); Utilities.setMapRedWork(conf, mrwork, mapWorkPath); jobConf = new JobConf(conf); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); jobConf.set(HAS_MAP_WORK, "true"); // The following config tells Hive to choose ExecMapper to read the MAP_WORK jobConf.set(MAPRED_MAPPER_CLASS, ExecMapper.class.getName()); // setting the split size to be 3 to create one split for 3 file groups jobConf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE, "3"); HoodieCombineHiveInputFormat combineHiveInputFormat = new HoodieCombineHiveInputFormat(); String tripsHiveColumnTypes = "double,string,string,string,double,double,double,double,double"; InputFormatTestUtil.setPropsForInputFormat(jobConf, schema, tripsHiveColumnTypes); InputSplit[] splits = combineHiveInputFormat.getSplits(jobConf, 1); // Since the SPLIT_SIZE is 3, we should create only 1 split with all 3 file groups assertEquals(1, splits.length); RecordReader<NullWritable, ArrayWritable> recordReader = combineHiveInputFormat.getRecordReader(splits[0], jobConf, null); NullWritable nullWritable = recordReader.createKey(); ArrayWritable arrayWritable = recordReader.createValue(); int counter = 0; while (recordReader.next(nullWritable, arrayWritable)) { // read over all the splits counter++; } // should read out 3 splits, each for file0, file1, file2 containing 1000 records each assertEquals(3000, counter); }