Java Code Examples for org.apache.spark.SparkConf#getInt()
org.apache.spark.SparkConf#getInt() .
Example 1
Source File: From systemds with Apache License 2.0 | 6 votes |
private void analyzeSparkParallelismConfiguation(SparkConf conf) { //ensure allocated spark conf SparkConf sconf = (conf == null) ? createSystemDSSparkConf() : conf; int numExecutors = sconf.getInt("spark.executor.instances", -1); int numCoresPerExec = sconf.getInt("spark.executor.cores", -1); int defaultPar = sconf.getInt("spark.default.parallelism", -1); if( numExecutors > 1 && (defaultPar > 1 || numCoresPerExec > 1) ) { _numExecutors = numExecutors; _defaultPar = (defaultPar>1) ? defaultPar : numExecutors * numCoresPerExec; _confOnly &= true; } else { //get default parallelism (total number of executors and cores) //note: spark context provides this information while conf does not //(for num executors we need to correct for driver and local mode) @SuppressWarnings("resource") JavaSparkContext jsc = getSparkContextStatic(); _numExecutors = Math.max( - 1, 1); _defaultPar = jsc.defaultParallelism(); _confOnly &= false; //implies env info refresh w/ spark context } }
Example 2
Source File: From systemds with Apache License 2.0 | 6 votes |
Example 3
Source File: From deeplearning4j with Apache License 2.0 | 6 votes |
public static <T> T assignVar(String variableName, SparkConf conf, Class clazz) throws Exception {
Object ret;
if (clazz.equals(Integer.class)) {
ret = conf.getInt(variableName, (Integer) getDefault(variableName));
} else if (clazz.equals(Double.class)) {
ret = conf.getDouble(variableName, (Double) getDefault(variableName));
} else if (clazz.equals(Boolean.class)) {
ret = conf.getBoolean(variableName, (Boolean) getDefault(variableName));
} else if (clazz.equals(String.class)) {
ret = conf.get(variableName, (String) getDefault(variableName));
} else if (clazz.equals(Long.class)) {
ret = conf.getLong(variableName, (Long) getDefault(variableName));
} else {
throw new Exception("Variable Type not supported. Only boolean, int, double and String supported.");
return (T) ret;
Example 4
Source File: From deeplearning4j with Apache License 2.0 | 6 votes |
public void setup(SparkConf conf) { useAdaGrad = conf.getBoolean(ADAGRAD, false); negative = conf.getDouble(NEGATIVE, 5); numWords = conf.getInt(NUM_WORDS, 1); window = conf.getInt(WINDOW, 5); alpha = conf.getDouble(ALPHA, 0.025f); minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f); totalWords = conf.getInt(NUM_WORDS, 1); iterations = conf.getInt(ITERATIONS, 5); vectorLength = conf.getInt(VECTOR_LENGTH, 100); initExpTable(); if (negative > 0 && conf.contains(TABLE)) { ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes()); DataInputStream dis = new DataInputStream(bis); table =; } }
Example 5
Source File: From deeplearning4j with Apache License 2.0 | 6 votes |
public void setup(SparkConf conf) { useAdaGrad = conf.getBoolean(Word2VecVariables.ADAGRAD, false); negative = conf.getDouble(Word2VecVariables.NEGATIVE, 5); numWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1); window = conf.getInt(Word2VecVariables.WINDOW, 5); alpha = conf.getDouble(Word2VecVariables.ALPHA, 0.025f); minAlpha = conf.getDouble(Word2VecVariables.MIN_ALPHA, 1e-2f); totalWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1); vectorLength = conf.getInt(Word2VecVariables.VECTOR_LENGTH, 100); initExpTable(); if (negative > 0 && conf.contains(Word2VecVariables.TABLE)) { ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(Word2VecVariables.TABLE).getBytes()); DataInputStream dis = new DataInputStream(bis); table =; } }
Example 6
Source File: From hudi with Apache License 2.0 | 5 votes |
private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD, HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator, final JavaSparkContext jsc) { if (config.getHbaseIndexPutBatchSizeAutoCompute()) { SparkConf conf = jsc.getConf(); int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1); if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) { maxExecutors = Math.max(maxExecutors, conf.getInt(DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1)); } /* * Each writeStatus represents status information from a write done in one of the IOHandles. If a writeStatus has * any insert, it implies that the corresponding task contacts HBase for doing puts, since we only do puts for * inserts from HBaseIndex. */ final Tuple2<Long, Integer> numPutsParallelismTuple = getHBasePutAccessParallelism(writeStatusRDD); final long numPuts = numPutsParallelismTuple._1; final int hbasePutsParallelism = numPutsParallelismTuple._2; this.numRegionServersForTable = getNumRegionServersAliveForTable(); final float desiredQPSFraction = hBaseIndexQPSResourceAllocator.calculateQPSFractionForPutsTime(numPuts, this.numRegionServersForTable);"Desired QPSFraction :" + desiredQPSFraction);"Number HBase puts :" + numPuts);"Hbase Puts Parallelism :" + hbasePutsParallelism); final float availableQpsFraction = hBaseIndexQPSResourceAllocator.acquireQPSResources(desiredQPSFraction, numPuts);"Allocated QPS Fraction :" + availableQpsFraction); multiPutBatchSize = putBatchSizeCalculator.getBatchSize(numRegionServersForTable, maxQpsPerRegionServer, hbasePutsParallelism, maxExecutors, SLEEP_TIME_MILLISECONDS, availableQpsFraction);"multiPutBatchSize :" + multiPutBatchSize); } }
Example 7
Source File: From sqoop-on-spark with Apache License 2.0 | 5 votes |
public static void execute(JobRequest request, SparkConf conf, JavaSparkContext sc) throws Exception {"Executing sqoop spark job"); long totalTime = System.currentTimeMillis(); SparkPrefixContext driverContext = new SparkPrefixContext(request.getConf(), JobConstants.PREFIX_CONNECTOR_DRIVER_CONTEXT); int defaultExtractors = conf.getInt(DEFAULT_EXTRACTORS, 10); long numExtractors = (driverContext.getLong(JobConstants.JOB_ETL_EXTRACTOR_NUM, defaultExtractors)); int numLoaders = conf.getInt(NUM_LOADERS, 1); List<Partition> sp = getPartitions(request, numExtractors); System.out.println(">>> Partition size:" + sp.size()); JavaRDD<Partition> rdd = sc.parallelize(sp, sp.size()); JavaRDD<List<IntermediateDataFormat<?>>> mapRDD = SqoopExtractFunction( request)); // if max loaders or num loaders is given reparition to adjust the max // loader parallelism if (numLoaders != numExtractors) { JavaRDD<List<IntermediateDataFormat<?>>> reParitionedRDD = mapRDD.repartition(numLoaders); System.out.println(">>> RePartition RDD size:" + reParitionedRDD.partitions().size()); reParitionedRDD.mapPartitions(new SqoopLoadFunction(request)).collect(); } else { System.out.println(">>> Mapped RDD size:" + mapRDD.partitions().size()); mapRDD.mapPartitions(new SqoopLoadFunction(request)).collect(); } System.out.println(">>> TOTAL time ms:" + (System.currentTimeMillis() - totalTime));"Done EL in sqoop spark job, next call destroy apis"); }