Java Code Examples for org.apache.hadoop.mapred.InputSplit#getLength()

The following examples show how to use org.apache.hadoop.mapred.InputSplit#getLength() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HdfsDataFragmenter.java From pxf with Apache License 2.0

6 votes

protected List<InputSplit> getSplits(Path path) throws IOException {
    PxfInputFormat pxfInputFormat = new PxfInputFormat();
    PxfInputFormat.setInputPaths(jobConf, path);
    InputSplit[] splits = pxfInputFormat.getSplits(jobConf, 1);
    List<InputSplit> result = new ArrayList<>();

    /*
     * HD-2547: If the file is empty, an empty split is returned: no
     * locations and no length.
     */
    if (splits != null) {
        for (InputSplit split : splits) {
            if (split.getLength() > 0) {
                result.add(split);
            }
        }
    }

    return result;
}

Example 2

Source File: HdfsDataFragmenter.java From pxf with Apache License 2.0

5 votes

@Override
public FragmentStats getFragmentStats() throws Exception {
    String absoluteDataPath = hcfsType.getDataUri(jobConf, context);
    List<InputSplit> splits = getSplits(new Path(absoluteDataPath));

    if (splits.isEmpty()) {
        return new FragmentStats(0, 0, 0);
    }
    long totalSize = 0;
    for (InputSplit split : splits) {
        totalSize += split.getLength();
    }
    InputSplit firstSplit = splits.get(0);
    return new FragmentStats(splits.size(), firstSplit.getLength(), totalSize);
}

Example 3

Source File: CompositeInputSplit.java From hadoop with Apache License 2.0

5 votes

/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}

Example 4

Source File: CompositeInputSplit.java From big-c with Apache License 2.0

5 votes

/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}

Example 5

Source File: DeprecatedParquetInputFormat.java From parquet-mr with Apache License 2.0

5 votes

public RecordReaderWrapper(
    InputSplit oldSplit, JobConf oldJobConf, Reporter reporter)
    throws IOException {
  splitLen = oldSplit.getLength();

  try {
    realReader = new ParquetRecordReader<V>(
        ParquetInputFormat.<V>getReadSupportInstance(oldJobConf),
        ParquetInputFormat.getFilter(oldJobConf));

    if (oldSplit instanceof ParquetInputSplitWrapper) {
      realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter);
    } else if (oldSplit instanceof FileSplit) {
      realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter);
    } else {
      throw new IllegalArgumentException(
          "Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit);
    }

    // read once to gain access to key and value objects
    if (realReader.nextKeyValue()) {
      firstRecord = true;
      valueContainer = new Container<V>();
      valueContainer.set(realReader.getCurrentValue());

    } else {
      eof = true;
    }
  } catch (InterruptedException e) {
    Thread.interrupted();
    throw new IOException(e);
  }
}

Example 6

Source File: ParquetAsTextInputFormat.java From iow-hadoop-streaming with Apache License 2.0

5 votes

public TextRecordReaderWrapper(ParquetInputFormat<SimpleGroup> newInputFormat,
                           InputSplit oldSplit,
                           JobConf oldJobConf,
                           Reporter reporter) throws IOException {

    splitLen = oldSplit.getLength();

    try {
        ReadSupport<SimpleGroup> rs = ParquetInputFormat.getReadSupportInstance(oldJobConf);
        realReader = new ParquetRecordReader<>(rs);
        realReader.initialize(((StreamingParquetInputSplitWrapper)oldSplit).realSplit, oldJobConf, reporter);

        oldJobConf.set("map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());
        oldJobConf.set("mapreduce.map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());

        // read once to gain access to key and value objects
        if (realReader.nextKeyValue()) {

          firstRecord = true;
          valueContainer = new Container<>();
          SimpleGroup v = realReader.getCurrentValue();
          valueContainer.set(v);
          ls = groupToStrings(v);
        } else {

          eof = true;
        }
    } catch (InterruptedException e) {
        Thread.interrupted();
        throw new IOException(e);
    }
}

Example 7

Source File: CompositeInputSplit.java From RDFS with Apache License 2.0

5 votes

/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}

Example 8

Source File: TezGroupedSplit.java From incubator-tez with Apache License 2.0

5 votes

public void addSplit(InputSplit split) {
  wrappedSplits.add(split);
  try {
    length += split.getLength();
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
}

Example 9

Source File: SplittableXmlInputFormat.java From Hive-XML-SerDe with Apache License 2.0

5 votes

@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException {

    InputStream inputStream = null;
    try {
        inputStream = getInputStream(job, (FileSplit) inputSplit);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
    long start = ((FileSplit) inputSplit).getStart();
    long end = start + inputSplit.getLength();

    return new HiveXmlRecordReader(job, inputStream, start, end);
}

Example 10

Source File: TezGroupedSplit.java From tez with Apache License 2.0

5 votes

public void addSplit(InputSplit split) {
  wrappedSplits.add(split);
  try {
    length += split.getLength();
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
}

Example 11

Source File: CompositeInputSplit.java From hadoop-gpu with Apache License 2.0

5 votes

/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}