Java Code Examples for org.apache.hadoop.mapred.InputSplit#getLength()
The following examples show how to use
org.apache.hadoop.mapred.InputSplit#getLength() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HdfsDataFragmenter.java From pxf with Apache License 2.0 | 6 votes |
protected List<InputSplit> getSplits(Path path) throws IOException { PxfInputFormat pxfInputFormat = new PxfInputFormat(); PxfInputFormat.setInputPaths(jobConf, path); InputSplit[] splits = pxfInputFormat.getSplits(jobConf, 1); List<InputSplit> result = new ArrayList<>(); /* * HD-2547: If the file is empty, an empty split is returned: no * locations and no length. */ if (splits != null) { for (InputSplit split : splits) { if (split.getLength() > 0) { result.add(split); } } } return result; }
Example 2
Source File: HdfsDataFragmenter.java From pxf with Apache License 2.0 | 5 votes |
@Override public FragmentStats getFragmentStats() throws Exception { String absoluteDataPath = hcfsType.getDataUri(jobConf, context); List<InputSplit> splits = getSplits(new Path(absoluteDataPath)); if (splits.isEmpty()) { return new FragmentStats(0, 0, 0); } long totalSize = 0; for (InputSplit split : splits) { totalSize += split.getLength(); } InputSplit firstSplit = splits.get(0); return new FragmentStats(splits.size(), firstSplit.getLength(), totalSize); }
Example 3
Source File: CompositeInputSplit.java From hadoop with Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * @throws IOException If capacity was not specified during construction * or if capacity has been reached. */ public void add(InputSplit s) throws IOException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }
Example 4
Source File: CompositeInputSplit.java From big-c with Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * @throws IOException If capacity was not specified during construction * or if capacity has been reached. */ public void add(InputSplit s) throws IOException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }
Example 5
Source File: DeprecatedParquetInputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
public RecordReaderWrapper( InputSplit oldSplit, JobConf oldJobConf, Reporter reporter) throws IOException { splitLen = oldSplit.getLength(); try { realReader = new ParquetRecordReader<V>( ParquetInputFormat.<V>getReadSupportInstance(oldJobConf), ParquetInputFormat.getFilter(oldJobConf)); if (oldSplit instanceof ParquetInputSplitWrapper) { realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter); } else if (oldSplit instanceof FileSplit) { realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter); } else { throw new IllegalArgumentException( "Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit); } // read once to gain access to key and value objects if (realReader.nextKeyValue()) { firstRecord = true; valueContainer = new Container<V>(); valueContainer.set(realReader.getCurrentValue()); } else { eof = true; } } catch (InterruptedException e) { Thread.interrupted(); throw new IOException(e); } }
Example 6
Source File: ParquetAsTextInputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 5 votes |
public TextRecordReaderWrapper(ParquetInputFormat<SimpleGroup> newInputFormat, InputSplit oldSplit, JobConf oldJobConf, Reporter reporter) throws IOException { splitLen = oldSplit.getLength(); try { ReadSupport<SimpleGroup> rs = ParquetInputFormat.getReadSupportInstance(oldJobConf); realReader = new ParquetRecordReader<>(rs); realReader.initialize(((StreamingParquetInputSplitWrapper)oldSplit).realSplit, oldJobConf, reporter); oldJobConf.set("map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString()); oldJobConf.set("mapreduce.map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString()); // read once to gain access to key and value objects if (realReader.nextKeyValue()) { firstRecord = true; valueContainer = new Container<>(); SimpleGroup v = realReader.getCurrentValue(); valueContainer.set(v); ls = groupToStrings(v); } else { eof = true; } } catch (InterruptedException e) { Thread.interrupted(); throw new IOException(e); } }
Example 7
Source File: CompositeInputSplit.java From RDFS with Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * @throws IOException If capacity was not specified during construction * or if capacity has been reached. */ public void add(InputSplit s) throws IOException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }
Example 8
Source File: TezGroupedSplit.java From incubator-tez with Apache License 2.0 | 5 votes |
public void addSplit(InputSplit split) { wrappedSplits.add(split); try { length += split.getLength(); } catch (Exception e) { throw new TezUncheckedException(e); } }
Example 9
Source File: SplittableXmlInputFormat.java From Hive-XML-SerDe with Apache License 2.0 | 5 votes |
@Override public RecordReader<LongWritable, Text> getRecordReader(InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException { InputStream inputStream = null; try { inputStream = getInputStream(job, (FileSplit) inputSplit); } catch (ClassNotFoundException e) { e.printStackTrace(); } long start = ((FileSplit) inputSplit).getStart(); long end = start + inputSplit.getLength(); return new HiveXmlRecordReader(job, inputStream, start, end); }
Example 10
Source File: TezGroupedSplit.java From tez with Apache License 2.0 | 5 votes |
public void addSplit(InputSplit split) { wrappedSplits.add(split); try { length += split.getLength(); } catch (Exception e) { throw new TezUncheckedException(e); } }
Example 11
Source File: CompositeInputSplit.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Add an InputSplit to this collection. * @throws IOException If capacity was not specified during construction * or if capacity has been reached. */ public void add(InputSplit s) throws IOException { if (null == splits) { throw new IOException("Uninitialized InputSplit"); } if (fill == splits.length) { throw new IOException("Too many splits"); } splits[fill++] = s; totsize += s.getLength(); }