org.apache.spark.util.SizeEstimator Java Examples
The following examples show how to use
org.apache.spark.util.SizeEstimator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkTextFileBoundedSourceVertex.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * Constructor. * * @param sparkContext the spark context. * @param inputPath the path of the target text file. * @param numPartitions the number of partitions. */ public SparkTextFileBoundedSourceVertex(final SparkContext sparkContext, final String inputPath, final int numPartitions) { this.readables = new ArrayList<>(); final Partition[] partitions = sparkContext.textFile(inputPath, numPartitions).getPartitions(); for (int i = 0; i < partitions.length; i++) { readables.add(new SparkTextFileBoundedSourceReadable( partitions[i], sparkContext.getConf(), i, inputPath, numPartitions)); } this.estimatedSizeBytes = SizeEstimator.estimate(sparkContext.textFile(inputPath, numPartitions)); }
Example #2
Source File: HoodieAppendHandle.java From hudi with Apache License 2.0 | 5 votes |
/** * Checks if the number of records have reached the set threshold and then flushes the records to disk. */ private void flushToDiskIfRequired(HoodieRecord record) { // Append if max number of records reached to achieve block size if (numberOfRecords >= (int) (maxBlockSize / averageRecordSize)) { // Recompute averageRecordSize before writing a new block and update existing value with // avg of new and old LOG.info("AvgRecordSize => " + averageRecordSize); averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record)) / 2; doAppend(header); estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords; numberOfRecords = 0; } }
Example #3
Source File: CachedSideInputReader.java From beam with Apache License 2.0 | 5 votes |
@Nullable @Override public <T> T get(PCollectionView<T> view, BoundedWindow window) { @SuppressWarnings("unchecked") final Cache<Key<T>, Value<T>> materializedCasted = (Cache) SideInputStorage.getMaterializedSideInputs(); Key<T> sideInputKey = new Key<>(view, window); try { Value<T> cachedResult = materializedCasted.get( sideInputKey, () -> { final T result = delegate.get(view, window); LOG.debug( "Caching de-serialized side input for {} of size [{}B] in memory.", sideInputKey, SizeEstimator.estimate(result)); return new Value<>(result); }); return cachedResult.getValue(); } catch (ExecutionException e) { throw new RuntimeException(e.getCause()); } }
Example #4
Source File: CachedSideInputReader.java From beam with Apache License 2.0 | 5 votes |
@Nullable @Override public <T> T get(PCollectionView<T> view, BoundedWindow window) { @SuppressWarnings("unchecked") final Cache<Key<T>, Value<T>> materializedCasted = (Cache) SideInputStorage.getMaterializedSideInputs(); Key<T> sideInputKey = new Key<>(view, window); try { Value<T> cachedResult = materializedCasted.get( sideInputKey, () -> { final T result = delegate.get(view, window); LOG.debug( "Caching de-serialized side input for {} of size [{}B] in memory.", sideInputKey, SizeEstimator.estimate(result)); return new Value<>(result); }); return cachedResult.getValue(); } catch (ExecutionException e) { throw new RuntimeException(e.getCause()); } }
Example #5
Source File: HoodieAppendHandle.java From hudi with Apache License 2.0 | 4 votes |
private void init(HoodieRecord record) { if (doInit) { // extract some information from the first record SliceView rtView = hoodieTable.getSliceView(); Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId); // Set the base commit time as the current instantTime for new inserts into log files String baseInstantTime = instantTime; if (fileSlice.isPresent()) { baseInstantTime = fileSlice.get().getBaseInstantTime(); } else { // This means there is no base data file, start appending to a new log file fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId)); LOG.info("New InsertHandle for partition :" + partitionPath); } writeStatus.getStat().setPrevCommit(baseInstantTime); writeStatus.setFileId(fileId); writeStatus.setPartitionPath(partitionPath); writeStatus.getStat().setPartitionPath(partitionPath); writeStatus.getStat().setFileId(fileId); averageRecordSize = SizeEstimator.estimate(record); try { //save hoodie partition meta in the partition path HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath)); partitionMetadata.trySave(getPartitionId()); this.writer = createLogWriter(fileSlice, baseInstantTime); this.currentLogFile = writer.getLogFile(); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion()); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize()); } catch (Exception e) { LOG.error("Error in update task at commit " + instantTime, e); writeStatus.setGlobalError(e); throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + partitionPath, e); } Path path = partitionPath.length() == 0 ? new Path(writer.getLogFile().getFileName()) : new Path(partitionPath, writer.getLogFile().getFileName()); writeStatus.getStat().setPath(path.toString()); doInit = false; } }
Example #6
Source File: SideInputBroadcast.java From beam with Apache License 2.0 | 4 votes |
public long getBroadcastSizeEstimate() { return SizeEstimator.estimate(bytes); }