org.apache.spark.util.SizeEstimator Java Examples

The following examples show how to use org.apache.spark.util.SizeEstimator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkTextFileBoundedSourceVertex.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 *
 * @param sparkContext  the spark context.
 * @param inputPath     the path of the target text file.
 * @param numPartitions the number of partitions.
 */
public SparkTextFileBoundedSourceVertex(final SparkContext sparkContext,
                                        final String inputPath,
                                        final int numPartitions) {
  this.readables = new ArrayList<>();
  final Partition[] partitions = sparkContext.textFile(inputPath, numPartitions).getPartitions();
  for (int i = 0; i < partitions.length; i++) {
    readables.add(new SparkTextFileBoundedSourceReadable(
      partitions[i],
      sparkContext.getConf(),
      i,
      inputPath,
      numPartitions));
  }
  this.estimatedSizeBytes = SizeEstimator.estimate(sparkContext.textFile(inputPath, numPartitions));
}
 
Example #2
Source File: HoodieAppendHandle.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Checks if the number of records have reached the set threshold and then flushes the records to disk.
 */
private void flushToDiskIfRequired(HoodieRecord record) {
  // Append if max number of records reached to achieve block size
  if (numberOfRecords >= (int) (maxBlockSize / averageRecordSize)) {
    // Recompute averageRecordSize before writing a new block and update existing value with
    // avg of new and old
    LOG.info("AvgRecordSize => " + averageRecordSize);
    averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record)) / 2;
    doAppend(header);
    estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords;
    numberOfRecords = 0;
  }
}
 
Example #3
Source File: CachedSideInputReader.java    From beam with Apache License 2.0 5 votes vote down vote up
@Nullable
@Override
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
  @SuppressWarnings("unchecked")
  final Cache<Key<T>, Value<T>> materializedCasted =
      (Cache) SideInputStorage.getMaterializedSideInputs();

  Key<T> sideInputKey = new Key<>(view, window);

  try {
    Value<T> cachedResult =
        materializedCasted.get(
            sideInputKey,
            () -> {
              final T result = delegate.get(view, window);
              LOG.debug(
                  "Caching de-serialized side input for {} of size [{}B] in memory.",
                  sideInputKey,
                  SizeEstimator.estimate(result));

              return new Value<>(result);
            });
    return cachedResult.getValue();
  } catch (ExecutionException e) {
    throw new RuntimeException(e.getCause());
  }
}
 
Example #4
Source File: CachedSideInputReader.java    From beam with Apache License 2.0 5 votes vote down vote up
@Nullable
@Override
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
  @SuppressWarnings("unchecked")
  final Cache<Key<T>, Value<T>> materializedCasted =
      (Cache) SideInputStorage.getMaterializedSideInputs();

  Key<T> sideInputKey = new Key<>(view, window);

  try {
    Value<T> cachedResult =
        materializedCasted.get(
            sideInputKey,
            () -> {
              final T result = delegate.get(view, window);
              LOG.debug(
                  "Caching de-serialized side input for {} of size [{}B] in memory.",
                  sideInputKey,
                  SizeEstimator.estimate(result));

              return new Value<>(result);
            });
    return cachedResult.getValue();
  } catch (ExecutionException e) {
    throw new RuntimeException(e.getCause());
  }
}
 
Example #5
Source File: HoodieAppendHandle.java    From hudi with Apache License 2.0 4 votes vote down vote up
private void init(HoodieRecord record) {
  if (doInit) {
    // extract some information from the first record
    SliceView rtView = hoodieTable.getSliceView();
    Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
    // Set the base commit time as the current instantTime for new inserts into log files
    String baseInstantTime = instantTime;
    if (fileSlice.isPresent()) {
      baseInstantTime = fileSlice.get().getBaseInstantTime();
    } else {
      // This means there is no base data file, start appending to a new log file
      fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
      LOG.info("New InsertHandle for partition :" + partitionPath);
    }
    writeStatus.getStat().setPrevCommit(baseInstantTime);
    writeStatus.setFileId(fileId);
    writeStatus.setPartitionPath(partitionPath);
    writeStatus.getStat().setPartitionPath(partitionPath);
    writeStatus.getStat().setFileId(fileId);
    averageRecordSize = SizeEstimator.estimate(record);
    try {
      //save hoodie partition meta in the partition path
      HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime,
          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
      partitionMetadata.trySave(getPartitionId());
      this.writer = createLogWriter(fileSlice, baseInstantTime);
      this.currentLogFile = writer.getLogFile();
      ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion());
      ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize());
    } catch (Exception e) {
      LOG.error("Error in update task at commit " + instantTime, e);
      writeStatus.setGlobalError(e);
      throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit "
          + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + partitionPath, e);
    }
    Path path = partitionPath.length() == 0 ? new Path(writer.getLogFile().getFileName())
        : new Path(partitionPath, writer.getLogFile().getFileName());
    writeStatus.getStat().setPath(path.toString());
    doInit = false;
  }
}
 
Example #6
Source File: SideInputBroadcast.java    From beam with Apache License 2.0 4 votes vote down vote up
public long getBroadcastSizeEstimate() {
  return SizeEstimator.estimate(bytes);
}