org.apache.hadoop.hive.ql.plan.PartitionDesc Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.plan.PartitionDesc.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HoodieCombineHiveInputFormat.java From hudi with Apache License 2.0 | 6 votes |
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim, Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException { this.inputSplitShim = inputSplitShim; this.pathToPartitionInfo = pathToPartitionInfo; if (job != null) { if (this.pathToPartitionInfo == null) { this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { PartitionDesc part = getPartitionFromPath(this.pathToPartitionInfo, ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } } }
Example #2
Source File: HoodieCombineHiveInputFormat.java From hudi with Apache License 2.0 | 6 votes |
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = getPartitionFromPath(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } Text.writeString(out, inputFormatClassName); if (HoodieParquetRealtimeInputFormat.class.getName().equals(inputFormatClassName)) { // Write Shim Class Name Text.writeString(out, inputSplitShim.getClass().getName()); } inputSplitShim.write(out); }
Example #3
Source File: HoodieCombineHiveInputFormat.java From hudi with Apache License 2.0 | 6 votes |
@Override public Set<Integer> call() throws Exception { Set<Integer> nonCombinablePathIndices = new HashSet<Integer>(); for (int i = 0; i < length; i++) { PartitionDesc part = getPartitionFromPath(pathToPartitionInfo, paths[i + start], IOPrepareCache.get().allocatePartitionDescMap()); // Use HiveInputFormat if any of the paths is not splittable Class<? extends InputFormat> inputFormatClass = part.getInputFileFormatClass(); InputFormat<WritableComparable, Writable> inputFormat = getInputFormatFromCache(inputFormatClass, conf); if (inputFormat instanceof AvoidSplitCombination && ((AvoidSplitCombination) inputFormat).shouldSkipCombine(paths[i + start], conf)) { if (LOG.isDebugEnabled()) { LOG.debug("The path [" + paths[i + start] + "] is being parked for HiveInputFormat.getSplits"); } nonCombinablePathIndices.add(i + start); } } return nonCombinablePathIndices; }
Example #4
Source File: Hive012Binding.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * Initialize the mapWork variable in order to get all the partition and start to update the jobconf * * @param job */ private void init(final JobConf job) { final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); if (mapWork == null && plan != null && plan.length() > 0) { mapWork = Utilities.getMapWork(job); pathToPartitionInfo.clear(); for (final Map.Entry<String, PartitionDesc> entry : mapWork.getPathToPartitionInfo().entrySet()) { pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue()); } } }
Example #5
Source File: Hive012Binding.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException { init(jobConf); final JobConf cloneJobConf = new JobConf(jobConf); final PartitionDesc part = pathToPartitionInfo.get(path.toString()); if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); } pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString()); return cloneJobConf; }
Example #6
Source File: Hive010Binding.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * Initialize the mrwork variable in order to get all the partition and start to update the jobconf * * @param job */ private void init(final JobConf job) { final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); if (mrwork == null && plan != null && plan.length() > 0) { mrwork = Utilities.getMapRedWork(job); pathToPartitionInfo.clear(); for (final Map.Entry<String, PartitionDesc> entry : mrwork.getPathToPartitionInfo().entrySet()) { pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue()); } } }
Example #7
Source File: Hive010Binding.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException { init(jobConf); final JobConf cloneJobConf = new JobConf(jobConf); final PartitionDesc part = pathToPartitionInfo.get(path.toString()); if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); } pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString()); return cloneJobConf; }
Example #8
Source File: HiveReaderSetting.java From multiple-dimension-spread with Apache License 2.0 | 4 votes |
public HiveReaderSetting( final FileSplit split, final JobConf job ){ config = new Configuration(); disableSkipBlock = job.getBoolean( "mds.disable.block.skip" , false ); disableFilterPushdown = job.getBoolean( "mds.disable.filter.pushdown" , false ); Set<String> pathNameSet= createPathSet( split.getPath() ); List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>(); String filterExprSerialized = job.get( TableScanDesc.FILTER_EXPR_CONF_STR ); if( filterExprSerialized != null ){ filterExprs.add( Utilities.deserializeExpression(filterExprSerialized) ); } MapWork mapWork; try{ mapWork = Utilities.getMapWork(job); }catch( Exception e ){ mapWork = null; } if( mapWork == null ){ node = createExpressionNode( filterExprs ); isVectorModeFlag = false; return; } node = createExpressionNode( filterExprs ); for( Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet() ){ if( ! pathNameSet.contains( pathsAndParts.getKey() ) ){ continue; } Properties props = pathsAndParts.getValue().getTableDesc().getProperties(); if( props.containsKey( "mds.expand" ) ){ config.set( "spread.reader.expand.column" , props.getProperty( "mds.expand" ) ); } if( props.containsKey( "mds.flatten" ) ){ config.set( "spread.reader.flatten.column" , props.getProperty( "mds.flatten" ) ); } } config.set( "spread.reader.read.column.names" , createReadColumnNames( job.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , null ) ) ); // Next Hive vesion; // Utilities.getUseVectorizedInputFileFormat(job) isVectorModeFlag = Utilities.isVectorMode( job ); }
Example #9
Source File: HoodieCombineRealtimeHiveSplit.java From hudi with Apache License 2.0 | 4 votes |
public HoodieCombineRealtimeHiveSplit(JobConf jobConf, CombineFileSplit combineFileSplit, Map<Path, PartitionDesc> map) throws IOException { super(jobConf, combineFileSplit, map); }
Example #10
Source File: TestHoodieCombineHiveInputFormat.java From hudi with Apache License 2.0 | 4 votes |
@Test @Disabled public void testHoodieRealtimeCombineHoodieInputFormat() throws Exception { Configuration conf = new Configuration(); // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema()); HoodieTestUtils.init(hadoopConf, tempDir.toAbsolutePath().toString(), HoodieTableType.MERGE_ON_READ); String commitTime = "100"; final int numRecords = 1000; // Create 3 parquet files with 1000 records each File partitionDir = InputFormatTestUtil.prepareParquetTable(tempDir, schema, 3, numRecords, commitTime); InputFormatTestUtil.commit(tempDir, commitTime); // insert 1000 update records to log file 0 String newCommitTime = "101"; HoodieLogFormat.Writer writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid0", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); // insert 1000 update records to log file 1 writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid1", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); // insert 1000 update records to log file 2 writer = InputFormatTestUtil.writeDataBlockToLogFile(partitionDir, fs, schema, "fileid2", commitTime, newCommitTime, numRecords, numRecords, 0); writer.close(); TableDesc tblDesc = Utilities.defaultTd; // Set the input format tblDesc.setInputFileFormatClass(HoodieCombineHiveInputFormat.class); PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path(tempDir.toAbsolutePath().toString()), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Path mapWorkPath = new Path(tempDir.toAbsolutePath().toString()); Utilities.setMapRedWork(conf, mrwork, mapWorkPath); jobConf = new JobConf(conf); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); jobConf.set(HAS_MAP_WORK, "true"); // The following config tells Hive to choose ExecMapper to read the MAP_WORK jobConf.set(MAPRED_MAPPER_CLASS, ExecMapper.class.getName()); // setting the split size to be 3 to create one split for 3 file groups jobConf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MAXSIZE, "3"); HoodieCombineHiveInputFormat combineHiveInputFormat = new HoodieCombineHiveInputFormat(); String tripsHiveColumnTypes = "double,string,string,string,double,double,double,double,double"; InputFormatTestUtil.setPropsForInputFormat(jobConf, schema, tripsHiveColumnTypes); InputSplit[] splits = combineHiveInputFormat.getSplits(jobConf, 1); // Since the SPLIT_SIZE is 3, we should create only 1 split with all 3 file groups assertEquals(1, splits.length); RecordReader<NullWritable, ArrayWritable> recordReader = combineHiveInputFormat.getRecordReader(splits[0], jobConf, null); NullWritable nullWritable = recordReader.createKey(); ArrayWritable arrayWritable = recordReader.createValue(); int counter = 0; while (recordReader.next(nullWritable, arrayWritable)) { // read over all the splits counter++; } // should read out 3 splits, each for file0, file1, file2 containing 1000 records each assertEquals(3000, counter); }
Example #11
Source File: CopybookRecordReader.java From CopybookInputFormat with Apache License 2.0 | 4 votes |
public CopybookRecordReader(FileSplit genericSplit, JobConf job) throws IOException { try { String cblPath = job.get(Const.COPYBOOK_INPUTFORMAT_CBL_HDFS_PATH_CONF); if (cblPath == null) { if (job != null) { MapWork mrwork = Utilities.getMapWork(job); if (mrwork == null) { System.out.println("When running a client side hive job you have to set \"copybook.inputformat.cbl.hdfs.path\" before executing the query." ); System.out.println("When running a MR job we can get this from the hive TBLProperties" ); } Map<String, PartitionDesc> map = mrwork.getPathToPartitionInfo(); for (Map.Entry<String, PartitionDesc> pathsAndParts : map.entrySet()) { System.out.println("Hey"); Properties props = pathsAndParts.getValue().getProperties(); cblPath = props .getProperty(Const.COPYBOOK_INPUTFORMAT_CBL_HDFS_PATH_CONF); break; } } } FileSystem fs = FileSystem.get(job); BufferedInputStream inputStream = new BufferedInputStream( fs.open(new Path(cblPath))); CobolCopybookLoader copybookInt = new CobolCopybookLoader(); externalRecord = copybookInt .loadCopyBook(inputStream, "RR", CopybookLoader.SPLIT_NONE, 0, "cp037", Convert.FMT_MAINFRAME, 0, null); int fileStructure = Constants.IO_FIXED_LENGTH; for (ExternalField field : externalRecord.getRecordFields()) { recordByteLength += field.getLen(); } // jump to the point in the split that the first whole record of split // starts at FileSplit split = (FileSplit) genericSplit; start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); BufferedInputStream fileIn = new BufferedInputStream(fs.open(split .getPath())); if (start != 0) { pos = start - (start % recordByteLength) + recordByteLength; fileIn.skip(pos); } ret = LineIOProvider.getInstance().getLineReader( fileStructure, LineIOProvider.getInstance().getLineProvider(fileStructure)); ret.open(fileIn, externalRecord); } catch (Exception e) { e.printStackTrace(); } }