Java Code Examples for org.apache.spark.storage.StorageLevel#NONE
The following examples show how to use
org.apache.spark.storage.StorageLevel#NONE .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseCommitActionExecutor.java From hudi with Apache License 2.0 | 5 votes |
public HoodieWriteMetadata execute(JavaRDD<HoodieRecord<T>> inputRecordsRDD) { HoodieWriteMetadata result = new HoodieWriteMetadata(); // Cache the tagged records, so we don't end up computing both // TODO: Consistent contract in HoodieWriteClient regarding preppedRecord storage level handling if (inputRecordsRDD.getStorageLevel() == StorageLevel.NONE()) { inputRecordsRDD.persist(StorageLevel.MEMORY_AND_DISK_SER()); } else { LOG.info("RDD PreppedRecords was persisted at: " + inputRecordsRDD.getStorageLevel()); } WorkloadProfile profile = null; if (isWorkloadProfileNeeded()) { profile = new WorkloadProfile(inputRecordsRDD); LOG.info("Workload profile :" + profile); saveWorkloadProfileMetadataToInflight(profile, instantTime); } // partition using the insert partitioner final Partitioner partitioner = getPartitioner(profile); JavaRDD<HoodieRecord<T>> partitionedRecords = partition(inputRecordsRDD, partitioner); JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> { if (WriteOperationType.isChangingRecords(operationType)) { return handleUpsertPartition(instantTime, partition, recordItr, partitioner); } else { return handleInsertPartition(instantTime, partition, recordItr, partitioner); } }, true).flatMap(List::iterator); updateIndexAndCommitIfNeeded(writeStatusRDD, result); return result; }
Example 2
Source File: DataStep.java From envelope with Apache License 2.0 | 5 votes |
public boolean isCached() { if (data == null) { return false; } return data.storageLevel() != StorageLevel.NONE(); }
Example 3
Source File: GrepCaching.java From flink-perf with Apache License 2.0 | 4 votes |
public static void main(String[] args) { String master = args[0]; String inFile = args[1]; String outFile = args[2]; String storageLevel = args[3]; String patterns[] = new String[args.length-4]; System.arraycopy(args, 4, patterns, 0, args.length - 4); System.err.println("Starting spark with master="+master+" in="+inFile); System.err.println("Using patterns: "+ Arrays.toString(patterns)); SparkConf conf = new SparkConf().setAppName("Grep job").setMaster(master).set("spark.hadoop.validateOutputSpecs", "false"); JavaSparkContext sc = new JavaSparkContext(conf); StorageLevel sl; switch(storageLevel) { case "MEMORY_ONLY": sl = StorageLevel.MEMORY_ONLY(); break; case "MEMORY_AND_DISK": sl = StorageLevel.MEMORY_AND_DISK(); break; case "MEMORY_ONLY_SER": sl = StorageLevel.MEMORY_ONLY_SER(); break; case "MEMORY_AND_DISK_SER": sl = StorageLevel.MEMORY_AND_DISK_SER(); break; case "NONE": sl = StorageLevel.NONE(); break; default: throw new RuntimeException("Unknown storage level "+storageLevel); } JavaRDD<String> file = sc.textFile(inFile).persist(sl); for(int p = 0; p < patterns.length; p++) { final String pattern = patterns[p]; JavaRDD<String> res = file.filter(new Function<String, Boolean>() { private static final long serialVersionUID = 1L; Pattern p = Pattern.compile(pattern); @Override public Boolean call(String value) throws Exception { if (value == null || value.length() == 0) { return false; } final Matcher m = p.matcher(value); if (m.find()) { return true; } return false; } }); res.saveAsTextFile(outFile+"_"+pattern); } }
Example 4
Source File: SparkExecutionContext.java From systemds with Apache License 2.0 | 3 votes |
/** * This call removes an rdd variable from executor memory and disk if required. * Hence, it is intended to be used on rmvar only. Depending on the * ASYNCHRONOUS_VAR_DESTROY configuration, this is asynchronous or not. * * @param rvar rdd variable to remove */ public static void cleanupRDDVariable(JavaPairRDD<?,?> rvar) { if( rvar.getStorageLevel()!=StorageLevel.NONE() ) { rvar.unpersist( !ASYNCHRONOUS_VAR_DESTROY ); } }
Example 5
Source File: SparkExecutionContext.java From systemds with Apache License 2.0 | 3 votes |
/** * This call removes an rdd variable from executor memory and disk if required. * Hence, it is intended to be used on rmvar only. Depending on the * ASYNCHRONOUS_VAR_DESTROY configuration, this is asynchronous or not. * * @param rvar rdd variable to remove */ public static void cleanupRDDVariable(JavaPairRDD<?,?> rvar) { if( rvar.getStorageLevel()!=StorageLevel.NONE() ) { rvar.unpersist( !ASYNCHRONOUS_VAR_DESTROY ); } }