org.apache.spark.SparkConf#contains

Source File: SparkContextProvider.java From rdf2x with Apache License 2.0

6 votes

/**
 * Provide a {@link JavaSparkContext} based on default settings
 *
 * @return a {@link JavaSparkContext} based on default settings
 */
public static JavaSparkContext provide() {
    SparkConf config = new SparkConf()
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .registerKryoClasses(getSerializableClasses());

    if (!config.contains("spark.app.name")) {
        config.setAppName("RDF2X");
    }
    if (!config.contains("spark.master")) {
        config.setMaster("local");
    }

    // set serialization registration required if you want to make sure you registered all your classes
    // some spark internal classes will need to be registered as well
    // config.set("spark.kryo.registrationRequired", "true");


    log.info("Getting Spark Context for config: \n{}", config.toDebugString());
    return new JavaSparkContext(config);
}

Source File: Word2VecPerformer.java From deeplearning4j with Apache License 2.0

6 votes

public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(Word2VecVariables.ADAGRAD, false);
    negative = conf.getDouble(Word2VecVariables.NEGATIVE, 5);
    numWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    window = conf.getInt(Word2VecVariables.WINDOW, 5);
    alpha = conf.getDouble(Word2VecVariables.ALPHA, 0.025f);
    minAlpha = conf.getDouble(Word2VecVariables.MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(Word2VecVariables.NUM_WORDS, 1);
    vectorLength = conf.getInt(Word2VecVariables.VECTOR_LENGTH, 100);
    initExpTable();

    if (negative > 0 && conf.contains(Word2VecVariables.TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(Word2VecVariables.TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }

}

Source File: Word2VecPerformerVoid.java From deeplearning4j with Apache License 2.0

6 votes

public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(ADAGRAD, false);
    negative = conf.getDouble(NEGATIVE, 5);
    numWords = conf.getInt(NUM_WORDS, 1);
    window = conf.getInt(WINDOW, 5);
    alpha = conf.getDouble(ALPHA, 0.025f);
    minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(NUM_WORDS, 1);
    iterations = conf.getInt(ITERATIONS, 5);
    vectorLength = conf.getInt(VECTOR_LENGTH, 100);

    initExpTable();

    if (negative > 0 && conf.contains(TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }
}

Source File: IPySparkInterpreter.java From zeppelin with Apache License 2.0

6 votes

@Override
public synchronized void open() throws InterpreterException {
  // IPySparkInterpreter may already be opened in PySparkInterpreter when ipython is available.
  if (opened) {
    return;
  }
  PySparkInterpreter pySparkInterpreter =
          getInterpreterInTheSameSessionByClassName(PySparkInterpreter.class, false);
  setProperty("zeppelin.python", pySparkInterpreter.getPythonExec());
  sparkInterpreter = getInterpreterInTheSameSessionByClassName(SparkInterpreter.class);
  setProperty("zeppelin.py4j.useAuth",
          sparkInterpreter.getSparkVersion().isSecretSocketSupported() + "");
  SparkConf conf = sparkInterpreter.getSparkContext().getConf();
  // only set PYTHONPATH in embedded, local or yarn-client mode.
  // yarn-cluster will setup PYTHONPATH automatically.
  if (!conf.contains(SparkStringConstants.SUBMIT_DEPLOY_MODE_PROP_NAME) ||
          !conf.get(SparkStringConstants.SUBMIT_DEPLOY_MODE_PROP_NAME).equals("cluster")) {
    setAdditionalPythonPath(PythonUtils.sparkPythonPath());
  }
  setUseBuiltinPy4j(false);
  setAdditionalPythonInitFile("python/zeppelin_ipyspark.py");
  setProperty("zeppelin.py4j.useAuth",
          sparkInterpreter.getSparkVersion().isSecretSocketSupported() + "");
  super.open();
  opened = true;
}

Source File: PSRpcFactory.java From systemds with Apache License 2.0

5 votes

public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}

Source File: SparkEngineBase.java From beakerx with Apache License 2.0

5 votes

protected void configureSparkConf(SparkConf sparkConf) {
  if (!sparkConf.contains(SPARK_APP_NAME)) {
    sparkConf.setAppName("beaker_" + UUID.randomUUID().toString());
  }
  if (sparkConf.contains(SPARK_MASTER) && !isLocalSpark(sparkConf)) {
    sparkConf.set(SPARK_REPL_CLASS_OUTPUT_DIR, KernelManager.get().getOutDir());
  }
}

Source File: SparkEngineNoUIImpl.java From beakerx with Apache License 2.0

5 votes

private void configureSparkConfDefaults(SparkConf sparkConf) {
  if (!sparkConf.contains(SPARK_MASTER)) {
    this.conf.getMaster().ifPresent(sparkConf::setMaster);
  }
  if (!sparkConf.contains(SPARK_EXECUTOR_CORES)) {
    this.conf.getExecutorCores().ifPresent(x -> sparkConf.set(SPARK_EXECUTOR_CORES, x));
  }
  if (!sparkConf.contains(SPARK_EXECUTOR_MEMORY)) {
    this.conf.getExecutorMemory().ifPresent(x -> sparkConf.set(SPARK_EXECUTOR_MEMORY, x));
  }
}

Source File: PSRpcFactory.java From systemds with Apache License 2.0

5 votes

public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}

Source File: SparkExecutionContext.java From systemds with Apache License 2.0

5 votes

/**
 * Sets up a SystemDS-preferred Spark configuration based on the implicit
 * default configuration (as passed via configurations from outside).
 *
 * @return spark configuration
 */
public static SparkConf createSystemDSSparkConf() {
	SparkConf conf = new SparkConf();

	//always set unlimited result size (required for cp collect)
	conf.set("spark.driver.maxResultSize", "0");

	//always use the fair scheduler (for single jobs, it's equivalent to fifo
	//but for concurrent jobs in parfor it ensures better data locality because
	//round robin assignment mitigates the problem of 'sticky slots')
	if( FAIR_SCHEDULER_MODE ) {
		conf.set("spark.scheduler.mode", "FAIR");
	}

	//increase scheduler delay (usually more robust due to better data locality)
	if( !conf.contains("spark.locality.wait") ) { //default 3s
		conf.set("spark.locality.wait", "5s");
	}
	
	//increase max message size for robustness
	String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
	String msgSizeConf = (UtilFunctions.compareVersion(sparkVersion, "2.0.0") < 0) ?
		"spark.akka.frameSize" : "spark.rpc.message.maxSize";
	if( !conf.contains(msgSizeConf) ) { //default 128MB
		conf.set(msgSizeConf, "512");
	}
	
	return conf;
}

Source File: IPySparkInterpreter.java From zeppelin with Apache License 2.0

5 votes

@Override
protected Map<String, String> setupKernelEnv() throws IOException {
  Map<String, String> env = super.setupKernelEnv();
  // set PYSPARK_PYTHON
  SparkConf conf = sparkInterpreter.getSparkContext().getConf();
  if (conf.contains("spark.pyspark.python")) {
    env.put("PYSPARK_PYTHON", conf.get("spark.pyspark.python"));
  }
  return env;
}

Source File: SparkContextFactory.java From beam with Apache License 2.0

5 votes

private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}

Source File: KerberosParameterValidations.java From envelope with Apache License 2.0

5 votes

@Override
public ValidationResult validate(Config config) {
  SparkConf conf = new SparkConf();
  if (!config.hasPath(USER_PRINC_CONFIG) && !conf.contains("spark.yarn.principal")) {
    return new ValidationResult(this, Validity.INVALID, USAGE);
  }
  return new ValidationResult(this, Validity.VALID,
      "Kerberos principal has been supplied");
}

Source File: KerberosParameterValidations.java From envelope with Apache License 2.0

5 votes

@Override
public ValidationResult validate(Config config) {
  SparkConf conf = new SparkConf();
  if (!config.hasPath(KEYTAB_CONFIG) && !conf.contains("spark.yarn.keytab")) {
    return new ValidationResult(this, Validity.INVALID, USAGE);
  }
  return new ValidationResult(this, Validity.VALID,
      "Kerberos keytab has been supplied");
}

Source File: SparkExecutionContext.java From systemds with Apache License 2.0

5 votes

/**
 * Sets up a SystemDS-preferred Spark configuration based on the implicit
 * default configuration (as passed via configurations from outside).
 *
 * @return spark configuration
 */
public static SparkConf createSystemDSSparkConf() {
	SparkConf conf = new SparkConf();

	//always set unlimited result size (required for cp collect)
	conf.set("spark.driver.maxResultSize", "0");

	//always use the fair scheduler (for single jobs, it's equivalent to fifo
	//but for concurrent jobs in parfor it ensures better data locality because
	//round robin assignment mitigates the problem of 'sticky slots')
	if( FAIR_SCHEDULER_MODE ) {
		conf.set("spark.scheduler.mode", "FAIR");
	}

	//increase scheduler delay (usually more robust due to better data locality)
	if( !conf.contains("spark.locality.wait") ) { //default 3s
		conf.set("spark.locality.wait", "5s");
	}
	
	//increase max message size for robustness
	String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
	String msgSizeConf = (UtilFunctions.compareVersion(sparkVersion, "2.0.0") < 0) ?
		"spark.akka.frameSize" : "spark.rpc.message.maxSize";
	if( !conf.contains(msgSizeConf) ) { //default 128MB
		conf.set(msgSizeConf, "512");
	}
	
	return conf;
}

Source File: SparkInterpreter.java From zeppelin with Apache License 2.0

4 votes

@Override
public void open() throws InterpreterException {
  try {
    SparkConf conf = new SparkConf();
    for (Map.Entry<Object, Object> entry : getProperties().entrySet()) {
      if (!StringUtils.isBlank(entry.getValue().toString())) {
        conf.set(entry.getKey().toString(), entry.getValue().toString());
      }
      // zeppelin.spark.useHiveContext & zeppelin.spark.concurrentSQL are legacy zeppelin
      // properties, convert them to spark properties here.
      if (entry.getKey().toString().equals("zeppelin.spark.useHiveContext")) {
        conf.set("spark.useHiveContext", entry.getValue().toString());
      }
      if (entry.getKey().toString().equals("zeppelin.spark.concurrentSQL")
          && entry.getValue().toString().equals("true")) {
        conf.set(SparkStringConstants.SCHEDULER_MODE_PROP_NAME, "FAIR");
      }
    }
    // use local mode for embedded spark mode when spark.master is not found
    if (!conf.contains(SparkStringConstants.MASTER_PROP_NAME)) {
      if (conf.contains("master")) {
        conf.set(SparkStringConstants.MASTER_PROP_NAME, conf.get("master"));
      } else {
        String masterEnv = System.getenv(SparkStringConstants.MASTER_ENV_NAME);
        conf.set(SparkStringConstants.MASTER_PROP_NAME,
                masterEnv == null ? SparkStringConstants.DEFAULT_MASTER_VALUE : masterEnv);
      }
    }
    this.innerInterpreter = loadSparkScalaInterpreter(conf);
    this.innerInterpreter.open();

    sc = this.innerInterpreter.getSparkContext();
    jsc = JavaSparkContext.fromSparkContext(sc);
    sparkVersion = SparkVersion.fromVersionString(sc.version());
    if (enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion()) {
      throw new Exception("This is not officially supported spark version: " + sparkVersion
          + "\nYou can set zeppelin.spark.enableSupportedVersionCheck to false if you really" +
          " want to try this version of spark.");
    }
    sqlContext = this.innerInterpreter.getSqlContext();
    sparkSession = this.innerInterpreter.getSparkSession();

    SESSION_NUM.incrementAndGet();
  } catch (Exception e) {
    LOGGER.error("Fail to open SparkInterpreter", e);
    throw new InterpreterException("Fail to open SparkInterpreter", e);
  }
}

Source File: SparkEngineBase.java From beakerx with Apache License 2.0

4 votes

private static boolean isLocalSpark(SparkConf sparkConf) {
  return sparkConf.contains(SPARK_MASTER) && sparkConf.get(SPARK_MASTER) != null && sparkConf.get("spark.master").startsWith("local");
}

Source File: SparkEngineWithUIImpl.java From beakerx with Apache License 2.0

4 votes

private void configureRuntime(SparkConf sparkConf) {
  if (sparkConf.contains("spark.master") && sparkConf.get("spark.master").contains("yarn")) {
    YarnSparkOptionCommand.runtimeConfiguration(this, sparkConf);
  }
}

Java Code Examples for org.apache.spark.SparkConf#contains()