Java Code Examples for org.apache.hadoop.mapred.FileInputFormat#getSplits()
The following examples show how to use
org.apache.hadoop.mapred.FileInputFormat#getSplits() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CartesianCommentComparison.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
private InputSplit[] getInputSplits(JobConf conf, String inputFormatClass, String inputPath, int numSplits) throws ClassNotFoundException, IOException { // Create a new instance of the input format FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance( Class.forName(inputFormatClass), conf); // Set the input path for the left data set inputFormat.setInputPaths(conf, inputPath); // Get the left input splits return inputFormat.getSplits(conf, numSplits); }
Example 2
Source File: OldApiHadoopFileInputSource.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Override public List<WorkUnit> getWorkunits(SourceState state) { JobConf jobConf = new JobConf(new Configuration()); for (String key : state.getPropertyNames()) { jobConf.set(key, state.getProp(key)); } if (state.contains(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) { for (String inputPath : state.getPropAsList(HadoopFileInputSource.FILE_INPUT_PATHS_KEY)) { FileInputFormat.addInputPath(jobConf, new Path(inputPath)); } } try { FileInputFormat<K, V> fileInputFormat = getFileInputFormat(state, jobConf); InputSplit[] fileSplits = fileInputFormat.getSplits(jobConf, state.getPropAsInt( HadoopFileInputSource.FILE_SPLITS_DESIRED_KEY, HadoopFileInputSource.DEFAULT_FILE_SPLITS_DESIRED)); if (fileSplits == null || fileSplits.length == 0) { return ImmutableList.of(); } Extract.TableType tableType = state.contains(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY) ? Extract.TableType.valueOf(state.getProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY).toUpperCase()) : null; String tableNamespace = state.getProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY); String tableName = state.getProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY); List<WorkUnit> workUnits = Lists.newArrayListWithCapacity(fileSplits.length); for (InputSplit inputSplit : fileSplits) { // Create one WorkUnit per InputSplit FileSplit fileSplit = (FileSplit) inputSplit; Extract extract = createExtract(tableType, tableNamespace, tableName); WorkUnit workUnit = WorkUnit.create(extract); workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit)); workUnit.setProp(HadoopFileInputSource.FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString()); workUnits.add(workUnit); } return workUnits; } catch (IOException ioe) { throw new RuntimeException("Failed to get workunits", ioe); } }