Java Code Examples for org.apache.spark.SparkConf#setAppName()
The following examples show how to use
org.apache.spark.SparkConf#setAppName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkRunnerKryoRegistratorTest.java From beam with Apache License 2.0 | 6 votes |
private void runSimplePipelineWithSparkContext(SparkConf conf) { SparkPipelineOptions options = PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class); options.setRunner(TestSparkRunner.class); conf.set("spark.master", "local"); conf.setAppName("test"); JavaSparkContext javaSparkContext = new JavaSparkContext(conf); options.setUsesProvidedSparkContext(true); options.as(SparkContextOptions.class).setProvidedSparkContext(javaSparkContext); Pipeline p = Pipeline.create(options); p.apply(Create.of("a")); // some operation to trigger pipeline construction p.run().waitUntilFinish(); javaSparkContext.stop(); }
Example 2
Source File: BlurLoadSparkProcessor.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
public void run() throws IOException { SparkConf conf = new SparkConf(); conf.setAppName(getAppName()); conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER); JavaSparkUtil.packProjectJars(conf); setupSparkConf(conf); JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration()); List<JavaDStream<T>> streamsList = getStreamsList(ssc); // Union all the streams if there is more than 1 stream JavaDStream<T> streams = unionStreams(ssc, streamsList); JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() { public Tuple2<String, RowMutation> call(T t) { RowMutation rowMutation = convert(t); return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation); } }); pairDStream.foreachRDD(getFunction()); ssc.start(); ssc.awaitTermination(); }
Example 3
Source File: LensAPI.java From cognition with Apache License 2.0 | 6 votes |
/** * Helper method for creating the spark context from the given cognition configuration * @return a new configured spark context */ public SparkContext createSparkContext() { SparkConf conf = new SparkConf(); Configuration config = cognition.getProperties(); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.setAppName(config.getString("app.name")); conf.setMaster(config.getString("master")); Iterator<String> iterator = config.getKeys("spark"); while (iterator.hasNext()) { String key = iterator.next(); conf.set(key, config.getString(key)); } SparkContext sc = new SparkContext(conf); for (String jar : config.getStringArray("jars")) { sc.addJar(jar); } return sc; }
Example 4
Source File: SparkContextProvider.java From rdf2x with Apache License 2.0 | 6 votes |
/** * Provide a {@link JavaSparkContext} based on default settings * * @return a {@link JavaSparkContext} based on default settings */ public static JavaSparkContext provide() { SparkConf config = new SparkConf() .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .registerKryoClasses(getSerializableClasses()); if (!config.contains("spark.app.name")) { config.setAppName("RDF2X"); } if (!config.contains("spark.master")) { config.setMaster("local"); } // set serialization registration required if you want to make sure you registered all your classes // some spark internal classes will need to be registered as well // config.set("spark.kryo.registrationRequired", "true"); log.info("Getting Spark Context for config: \n{}", config.toDebugString()); return new JavaSparkContext(config); }
Example 5
Source File: SparkAppLauncher.java From sylph with Apache License 2.0 | 6 votes |
public ApplicationId run(Job job) throws Exception { SparkJobConfig jobConfig = job.getConfig(); System.setProperty("SPARK_YARN_MODE", "true"); SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.driver.extraJavaOptions", "-XX:PermSize=64M -XX:MaxPermSize=128M"); sparkConf.set("spark.yarn.stagingDir", appHome); //------------- sparkConf.set("spark.executor.instances", jobConfig.getNumExecutors() + ""); //EXECUTOR_COUNT sparkConf.set("spark.executor.memory", jobConfig.getExecutorMemory()); //EXECUTOR_MEMORY sparkConf.set("spark.executor.cores", jobConfig.getExecutorCores() + ""); sparkConf.set("spark.driver.cores", jobConfig.getDriverCores() + ""); sparkConf.set("spark.driver.memory", jobConfig.getDriverMemory()); //-------------- sparkConf.setSparkHome(sparkHome); sparkConf.setMaster("yarn"); sparkConf.setAppName(job.getName()); sparkConf.set("spark.submit.deployMode", "cluster"); // worked //set Depends set spark.yarn.dist.jars and spark.yarn.dist.files setDistJars(job, sparkConf); String[] args = getArgs(); ClientArguments clientArguments = new ClientArguments(args); // spark-2.0.0 //yarnClient.getConfig().iterator().forEachRemaining(x -> sparkConf.set("spark.hadoop." + x.getKey(), x.getValue())); Client appClient = new SylphSparkYarnClient(clientArguments, sparkConf, yarnClient, jobConfig.getQueue()); return appClient.submitApplication(); }
Example 6
Source File: PreprocessSpark.java From Java-Deep-Learning-Cookbook with MIT License | 6 votes |
protected void entryPoint(String[] args) throws Exception { JCommander jcmdr = new JCommander(this); jcmdr.parse(args); //JCommanderUtils.parseArgs(this, args); SparkConf conf = new SparkConf(); conf.setMaster("local[*]"); conf.setAppName("DL4JTinyImageNetSparkPreproc"); JavaSparkContext sc = new JavaSparkContext(conf); //Create training set JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS); SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc); //Create test set JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS); SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc); System.out.println("----- Data Preprocessing Complete -----"); }
Example 7
Source File: AbstractSparkLayer.java From spark-streaming-direct-kafka with Apache License 2.0 | 5 votes |
public SparkConf getSparkConf() { SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.streaming.kafka.maxRatePerPartition", config.getSparkStreamingKafkaMaxRatePerPartition()); // rate limiting sparkConf.setAppName("StreamingEngine-" + config.getTopicSet().toString() + "-" + config.getNamespace()); if (config.getLocalMode()) { sparkConf.setMaster("local[4]"); } return sparkConf; }
Example 8
Source File: SparkStreaming.java From kafka-spark-avro-example with Apache License 2.0 | 5 votes |
public static void main(String... args) { SparkConf conf = new SparkConf(); conf.setMaster("local[2]"); conf.setAppName("Spark Streaming Test Java"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10)); processStream(ssc, sc); ssc.start(); ssc.awaitTermination(); }
Example 9
Source File: SparkTestEnvironment.java From geowave with Apache License 2.0 | 5 votes |
@Override public void setup() throws Exception { if (defaultSession == null) { final SparkConf addonOptions = new SparkConf(); addonOptions.setMaster("local[*]"); addonOptions.setAppName("CoreGeoWaveSparkITs"); defaultSession = GeoWaveSparkConf.createDefaultSession(addonOptions); if (defaultSession == null) { LOGGER.error("Unable to create default spark session for tests"); return; } } }
Example 10
Source File: SparkContextFactory.java From beam with Apache License 2.0 | 5 votes |
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) { if (usesProvidedSparkContext) { LOG.info("Using a provided Spark Context"); JavaSparkContext jsc = contextOptions.getProvidedSparkContext(); if (jsc == null || jsc.sc().isStopped()) { LOG.error("The provided Spark context " + jsc + " was not created or was stopped"); throw new RuntimeException("The provided Spark context was not created or was stopped"); } return jsc; } else { LOG.info("Creating a brand new Spark Context."); SparkConf conf = new SparkConf(); if (!conf.contains("spark.master")) { // set master if not set. conf.setMaster(contextOptions.getSparkMaster()); } if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) { conf.setJars(contextOptions.getFilesToStage().toArray(new String[0])); } conf.setAppName(contextOptions.getAppName()); // register immutable collections serializers because the SDK uses them. conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName()); return new JavaSparkContext(conf); } }
Example 11
Source File: SparkTestBase.java From spark-transformers with Apache License 2.0 | 5 votes |
@Before public void setup() { SparkConf sparkConf = new SparkConf(); String master = "local[2]"; sparkConf.setMaster(master); sparkConf.setAppName("Local Spark Unit Test"); sc = new JavaSparkContext(new SparkContext(sparkConf)); sqlContext = new SQLContext(sc); }
Example 12
Source File: TestNd4jKryoSerialization.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Before public void before() { SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); sparkConf.set("spark.driver.host", "localhost"); sparkConf.setAppName("Iris"); sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); sparkConf.set("spark.kryo.registrator", "org.nd4j.kryo.Nd4jRegistrator"); sc = new JavaSparkContext(sparkConf); }
Example 13
Source File: LensTest.java From cognition with Apache License 2.0 | 5 votes |
@Test public void test() throws AccumuloSecurityException, IOException, AccumuloException, TableExistsException, TableNotFoundException { /*Connector conn = instance.getConnector("root", new PasswordToken()); Scanner scan = conn.createScanner("moreover", Authorizations.EMPTY); for(Map.Entry<Key, Value> entry : scan){ System.out.println(entry); }*/ SparkConf conf = new SparkConf(); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.setAppName("test"); conf.setMaster("local[2]"); SparkContext sc = new SparkContext(conf); CognitionConfiguration pip = new CognitionConfiguration(new AccumuloConfiguration(instance, user, password, true)); LensAPI lens = new LensAPI(sc, pip); Criteria criteria = new Criteria(); criteria.addKeyword("test"); criteria.setDates(Instant.parse("2015-10-20T09:19:12Z"), Instant.parse("2015-10-20T09:19:13Z")); SchemaAdapter s = new SchemaAdapter(); s.loadJson("moreover-schema.json"); criteria.setSchema(s); criteria.setAccumuloTable("moreover"); String json = lens.query(criteria); assertEquals("[moreover json]", json); }
Example 14
Source File: SparkRunnerTestUtils.java From components with Apache License 2.0 | 5 votes |
public Pipeline createPipeline() { SparkContextOptions sparkOpts = options.as(SparkContextOptions.class); sparkOpts.setFilesToStage(emptyList()); SparkConf conf = new SparkConf(); conf.setAppName(appName); conf.setMaster("local[2]"); conf.set("spark.driver.allowMultipleContexts", "true"); JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf)); sparkOpts.setProvidedSparkContext(jsc); sparkOpts.setUsesProvidedSparkContext(true); sparkOpts.setRunner(SparkRunner.class); return Pipeline.create(sparkOpts); }
Example 15
Source File: SparkIntegrationTestResource.java From components with Apache License 2.0 | 5 votes |
/** * @return a clean spark configuration created from the options in this resource. */ public SparkConf createSparkConf(String appName) { SparkConf conf = new SparkConf(); conf.setAppName(appName); conf.setMaster(sm); // conf.set("spark.driver.host", "10.42.30.148"); for (Map.Entry<String, String> kv : hadoopConf.entrySet()) conf.set("spark.hadoop." + kv.getKey(), kv.getValue()); return conf; }
Example 16
Source File: Spark.java From tinkerpop with Apache License 2.0 | 5 votes |
public static SparkContext create(final SparkConf sparkConf) { if (isContextNullOrStopped()) { sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin"); CONTEXT = SparkContext.getOrCreate(sparkConf); } return CONTEXT; }
Example 17
Source File: SparkRefine.java From p3-batchrefine with Apache License 2.0 | 5 votes |
public SparkRefine() { LogManager.getRootLogger().setLevel(Level.ERROR); fLogger.setLevel(Level.INFO); SparkConf sparkConfiguration = new SparkConf(true); sparkConfiguration.setAppName(APP_NAME); sparkConfiguration.setMaster(sparkConfiguration.get("spark.master", "local")); sparkConfiguration.set("spark.task.cpus", sparkConfiguration.get("spark.executor.cores", "1")); sparkContext = new JavaSparkContext(sparkConfiguration); new ConsoleProgressBar(sparkContext.sc()); }
Example 18
Source File: WordCountJava.java From BigDataArchitect with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws FileNotFoundException { SparkConf conf = new SparkConf(); conf.setAppName("java-wordcount"); conf.setMaster("local"); JavaSparkContext jsc = new JavaSparkContext(conf); JavaRDD<String> fileRDD = jsc.textFile("bigdata-spark/data/testdata.txt"); JavaRDD<String> words = fileRDD.flatMap(new FlatMapFunction<String, String>() { public Iterator<String> call(String line) throws Exception { return Arrays.asList(line.split(" ")).iterator(); } }); JavaPairRDD<String, Integer> pairWord = words.mapToPair(new PairFunction<String, String, Integer>() { public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String, Integer>(word, 1); } }); JavaPairRDD<String, Integer> res = pairWord.reduceByKey(new Function2<Integer, Integer, Integer>() { public Integer call(Integer oldV, Integer v) throws Exception { return oldV + v; } }); res.foreach(new VoidFunction<Tuple2<String, Integer>>() { public void call(Tuple2<String, Integer> value) throws Exception { System.out.println(value._1+"\t"+value._2); } }); // // RandomAccessFile rfile = new RandomAccessFile("ooxx","rw"); // //// rfile.seek(222); // FileChannel channel = rfile.getChannel(); // // linux fd write(fd) read(fd) // // // ByteBuffer b1 = ByteBuffer.allocate(1024); // ByteBuffer b2 = ByteBuffer.allocateDirect(1024); // MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 80, 120); // }
Example 19
Source File: WordCountingAppWithCheckpoint.java From tutorials with MIT License | 4 votes |
public static void main(String[] args) throws InterruptedException { Logger.getLogger("org") .setLevel(Level.OFF); Logger.getLogger("akka") .setLevel(Level.OFF); Map<String, Object> kafkaParams = new HashMap<>(); kafkaParams.put("bootstrap.servers", "localhost:9092"); kafkaParams.put("key.deserializer", StringDeserializer.class); kafkaParams.put("value.deserializer", StringDeserializer.class); kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream"); kafkaParams.put("auto.offset.reset", "latest"); kafkaParams.put("enable.auto.commit", false); Collection<String> topics = Arrays.asList("messages"); SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[2]"); sparkConf.setAppName("WordCountingAppWithCheckpoint"); sparkConf.set("spark.cassandra.connection.host", "127.0.0.1"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); sparkContext = streamingContext.sparkContext(); streamingContext.checkpoint("./.checkpoint"); JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String> Subscribe(topics, kafkaParams)); JavaPairDStream<String, String> results = messages.mapToPair(record -> new Tuple2<>(record.key(), record.value())); JavaDStream<String> lines = results.map(tuple2 -> tuple2._2()); JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(x.split("\\s+")) .iterator()); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1)) .reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2); JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((word, one, state) -> { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; })); cumulativeWordCounts.foreachRDD(javaRdd -> { List<Tuple2<String, Integer>> wordCountList = javaRdd.collect(); for (Tuple2<String, Integer> tuple : wordCountList) { List<Word> wordList = Arrays.asList(new Word(tuple._1, tuple._2)); JavaRDD<Word> rdd = sparkContext.parallelize(wordList); javaFunctions(rdd).writerBuilder("vocabulary", "words", mapToRow(Word.class)) .saveToCassandra(); } }); streamingContext.start(); streamingContext.awaitTermination(); }
Example 20
Source File: LocalPropertyTest.java From tinkerpop with Apache License 2.0 | 4 votes |
@Test public void shouldSetThreadLocalProperties() throws Exception { final String testName = "ThreadLocalProperties"; final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class, UUID.randomUUID().toString()); final Configuration configuration = new BaseConfiguration(); configuration.setProperty("spark.master", "local[4]"); configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName()); configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo")); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false); configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); configuration.setProperty("spark.jobGroup.id", "22"); Graph graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.EDGES) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)), "gremlin-groovy", "g.V()").create(graph)).submit().get(); //////// SparkConf sparkConfiguration = new SparkConf(); sparkConfiguration.setAppName(testName); ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue())); JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration)); JavaSparkStatusTracker statusTracker = sparkContext.statusTracker(); assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1); assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName))); /////// configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName()); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null); configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null); // just a note that this value should have always been set to true, but from the initial commit was false. // interestingly the last assertion had always passed up to spark 2.3.x when it started to fail. apparently // that assertion should likely have never passed, so it stands to reason that there was a bug in spark in // 2.2.x that was resolved for 2.3.x....that's my story and i'm sticking to it. configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); configuration.setProperty("spark.jobGroup.id", "44"); graph = GraphFactory.open(configuration); graph.compute(SparkGraphComputer.class) .result(GraphComputer.ResultGraph.NEW) .persist(GraphComputer.Persist.NOTHING) .program(TraversalVertexProgram.build() .traversal(graph.traversal().withComputer(SparkGraphComputer.class), "gremlin-groovy", "g.V()").create(graph)).submit().get(); /////// assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1); }