Java Code Examples for org.apache.spark.SparkConf#setMaster()
The following examples show how to use
org.apache.spark.SparkConf#setMaster() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LensAPI.java From cognition with Apache License 2.0 | 6 votes |
/** * Helper method for creating the spark context from the given cognition configuration * @return a new configured spark context */ public SparkContext createSparkContext() { SparkConf conf = new SparkConf(); Configuration config = cognition.getProperties(); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.setAppName(config.getString("app.name")); conf.setMaster(config.getString("master")); Iterator<String> iterator = config.getKeys("spark"); while (iterator.hasNext()) { String key = iterator.next(); conf.set(key, config.getString(key)); } SparkContext sc = new SparkContext(conf); for (String jar : config.getStringArray("jars")) { sc.addJar(jar); } return sc; }
Example 2
Source File: SparkContextProvider.java From rdf2x with Apache License 2.0 | 6 votes |
/** * Provide a {@link JavaSparkContext} based on default settings * * @return a {@link JavaSparkContext} based on default settings */ public static JavaSparkContext provide() { SparkConf config = new SparkConf() .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .registerKryoClasses(getSerializableClasses()); if (!config.contains("spark.app.name")) { config.setAppName("RDF2X"); } if (!config.contains("spark.master")) { config.setMaster("local"); } // set serialization registration required if you want to make sure you registered all your classes // some spark internal classes will need to be registered as well // config.set("spark.kryo.registrationRequired", "true"); log.info("Getting Spark Context for config: \n{}", config.toDebugString()); return new JavaSparkContext(config); }
Example 3
Source File: SparkUtils.java From BigDataPlatform with GNU General Public License v3.0 | 5 votes |
/** * 根据当前是否本地测试的配置 * 决定,如何设置SparkConf的master */ public static void setMaster(SparkConf conf) { boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL); if(local) { conf.setMaster("local"); } }
Example 4
Source File: SparkTestBase.java From spark-transformers with Apache License 2.0 | 5 votes |
@Before public void setup() { SparkConf sparkConf = new SparkConf(); String master = "local[2]"; sparkConf.setMaster(master); sparkConf.setAppName("Local Spark Unit Test"); sc = new JavaSparkContext(new SparkContext(sparkConf)); sqlContext = new SQLContext(sc); }
Example 5
Source File: JavaDemo.java From spark-on-cassandra-quickstart with Apache License 2.0 | 5 votes |
public static void main(String[] args) { if (args.length != 2) { System.err.println("Syntax: com.datastax.spark.demo.JavaDemo <Spark Master URL> <Cassandra contact point>"); System.exit(1); } SparkConf conf = new SparkConf(); conf.setAppName("Java API demo"); conf.setMaster(args[0]); conf.set("spark.cassandra.connection.host", args[1]); JavaDemo app = new JavaDemo(conf); app.run(); }
Example 6
Source File: TestNd4jKryoSerialization.java From nd4j with Apache License 2.0 | 5 votes |
@Before public void before() { SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); sparkConf.set("spark.driver.host", "localhost"); sparkConf.setAppName("Iris"); sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); sparkConf.set("spark.kryo.registrator", "org.nd4j.Nd4jRegistrator"); sc = new JavaSparkContext(sparkConf); }
Example 7
Source File: LensTest.java From cognition with Apache License 2.0 | 5 votes |
@Test public void test() throws AccumuloSecurityException, IOException, AccumuloException, TableExistsException, TableNotFoundException { /*Connector conn = instance.getConnector("root", new PasswordToken()); Scanner scan = conn.createScanner("moreover", Authorizations.EMPTY); for(Map.Entry<Key, Value> entry : scan){ System.out.println(entry); }*/ SparkConf conf = new SparkConf(); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.setAppName("test"); conf.setMaster("local[2]"); SparkContext sc = new SparkContext(conf); CognitionConfiguration pip = new CognitionConfiguration(new AccumuloConfiguration(instance, user, password, true)); LensAPI lens = new LensAPI(sc, pip); Criteria criteria = new Criteria(); criteria.addKeyword("test"); criteria.setDates(Instant.parse("2015-10-20T09:19:12Z"), Instant.parse("2015-10-20T09:19:13Z")); SchemaAdapter s = new SchemaAdapter(); s.loadJson("moreover-schema.json"); criteria.setSchema(s); criteria.setAccumuloTable("moreover"); String json = lens.query(criteria); assertEquals("[moreover json]", json); }
Example 8
Source File: SparkStreaming.java From kafka-spark-avro-example with Apache License 2.0 | 5 votes |
public static void main(String... args) { SparkConf conf = new SparkConf(); conf.setMaster("local[2]"); conf.setAppName("Spark Streaming Test Java"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10)); processStream(ssc, sc); ssc.start(); ssc.awaitTermination(); }
Example 9
Source File: AbstractSparkLayer.java From spark-streaming-direct-kafka with Apache License 2.0 | 5 votes |
public SparkConf getSparkConf() { SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.streaming.kafka.maxRatePerPartition", config.getSparkStreamingKafkaMaxRatePerPartition()); // rate limiting sparkConf.setAppName("StreamingEngine-" + config.getTopicSet().toString() + "-" + config.getNamespace()); if (config.getLocalMode()) { sparkConf.setMaster("local[4]"); } return sparkConf; }
Example 10
Source File: KafkaProcessingApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testKafkaDataProcessingActivityLevel() throws Exception { //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0"); //run main class String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_ACTIVITY, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY, "-d", "|", "-sr", "false", "-dl", "activity", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_activity/"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); // run main class KafkaProcessingApplication.main(args); //start Spark session SparkSession sparkSession = SparkSession.builder() .master("local[*]") .appName("IntegrationTest") .getOrCreate(); //generate Dataset and create hash to compare Dataset<Row> importedDataset = sparkSession.read() .option("inferSchema", "true") .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY + "/result/parquet"); //check that dataset contains 12 lines assertEquals(12, importedDataset.count()); //check that dataset contains 43 columns assertEquals(43, importedDataset.columns().length); //check hash of dataset String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect()); System.out.println(hash); assertEquals("A8BBFC3B17C00C40C9883DA1F396D453", hash); //close Spark session sparkSession.close(); }
Example 11
Source File: CSVImportAndProcessingApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 5 votes |
@BeforeClass public static void setUpBeforeClass() throws IOException { //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0"); String args[] = {"-fs", TEST_INPUT_FILE_NAME, "-fd", TEST_OUTPUT_FILE_PATH, "-d", ";", "-sr", "true", "-sm", "overwrite", "-of", "csv", "-wd", "./src/test/resources/config/csv/"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); // run main class CSVImportAndProcessingApplication.main(args); //read result csv BufferedReader resultFileReader = new BufferedReader(new FileReader(new File(TEST_OUTPUT_FILE_NAME))); headerValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); firstLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); secondLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); thirdLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); fourthLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); fifthLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); //result should only contain 5 value lines try { sixthLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER); } catch (NullPointerException e) { //expected, so continue. will be tested later } resultFileReader.close(); }
Example 12
Source File: TestNd4jKryoSerialization.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Before public void before() { SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); sparkConf.set("spark.driver.host", "localhost"); sparkConf.setAppName("Iris"); sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); sparkConf.set("spark.kryo.registrator", "org.nd4j.kryo.Nd4jRegistrator"); sc = new JavaSparkContext(sparkConf); }
Example 13
Source File: SparkScheduler.java From oodt with Apache License 2.0 | 5 votes |
public SparkScheduler(JobQueue queue) { SparkConf conf = new SparkConf(); conf.setMaster(System.getProperty("resource.runner.spark.host","local")); conf.setAppName("OODT Spark Job"); URL location = SparkScheduler.class.getResource('/'+SparkScheduler.class.getName().replace('.', '/')+".class"); conf.setJars(new String[]{"../lib/cas-resource-0.8-SNAPSHOT.jar"}); sc = new SparkContext(conf); ssc = new StreamingContext(sc,new Duration(10000)); this.queue = queue; }
Example 14
Source File: GeoWaveSparkConf.java From geowave with Apache License 2.0 | 5 votes |
public static SparkConf getDefaultConfig() { SparkConf defaultConfig = new SparkConf(); defaultConfig = defaultConfig.setMaster("yarn"); defaultConfig = defaultConfig.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); defaultConfig = defaultConfig.set( "spark.kryo.registrator", "org.locationtech.geowave.analytic.spark.GeoWaveRegistrator"); return defaultConfig; }
Example 15
Source File: SparkRefine.java From p3-batchrefine with Apache License 2.0 | 5 votes |
public SparkRefine() { LogManager.getRootLogger().setLevel(Level.ERROR); fLogger.setLevel(Level.INFO); SparkConf sparkConfiguration = new SparkConf(true); sparkConfiguration.setAppName(APP_NAME); sparkConfiguration.setMaster(sparkConfiguration.get("spark.master", "local")); sparkConfiguration.set("spark.task.cpus", sparkConfiguration.get("spark.executor.cores", "1")); sparkContext = new JavaSparkContext(sparkConfiguration); new ConsoleProgressBar(sparkContext.sc()); }
Example 16
Source File: SparkUtils.java From SparkDemo with MIT License | 4 votes |
public static SparkConf getRemoteSparkConf(Class clazz) { SparkConf conf = new SparkConf().setAppName(clazz.getName()); conf.setMaster(Constant.SPARK_REMOTE_SERVER_ADDRESS); conf.set("deploy-mode", "client"); return conf; }
Example 17
Source File: KafkaProcessingApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test public void testKafkaDataProcessingProcessLevel() throws Exception { //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0"); //run main class String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_PROCESS, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS, "-d", "|", "-sr", "false", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_process/"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); // run main class KafkaProcessingApplication.main(args); //start Spark session SparkSession sparkSession = SparkSession.builder() .master("local[*]") .appName("IntegrationTest") .getOrCreate(); //generate Dataset and create hash to compare Dataset<Row> importedDataset = sparkSession.read() .option("inferSchema", "true") .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS + "/result/parquet"); //check that dataset contains 4 lines assertEquals(4, importedDataset.count()); //check that dataset contains 42 columns assertEquals(42, importedDataset.columns().length); //convert rows to string String[] resultLines = (String[]) importedDataset.map(row -> row.mkString(), Encoders.STRING()).collectAsList().toArray(); for(String l : resultLines) { System.out.println(l); } //check if hashes of line values are correct //kept in for easier amendment after test case change // System.out.println(DigestUtils.md5Hex(resultLines[0]).toUpperCase()); // System.out.println(DigestUtils.md5Hex(resultLines[1]).toUpperCase()); // System.out.println(DigestUtils.md5Hex(resultLines[2]).toUpperCase()); // System.out.println(DigestUtils.md5Hex(resultLines[3]).toUpperCase()); assertEquals("9088849D6374163C3E9DACB3090D4E56", DigestUtils.md5Hex(resultLines[0]).toUpperCase()); assertEquals("415A0A505F9A32002C1342171E7649F9", DigestUtils.md5Hex(resultLines[1]).toUpperCase()); assertEquals("C83F9CC0618D7FA50D63753FBC429188", DigestUtils.md5Hex(resultLines[2]).toUpperCase()); assertEquals("0559C383855FDE566069B483188E06C0", DigestUtils.md5Hex(resultLines[3]).toUpperCase()); //close Spark session sparkSession.close(); }
Example 18
Source File: Spark.java From tinkerpop with Apache License 2.0 | 4 votes |
public static SparkContext create(final String master) { final SparkConf sparkConf = new SparkConf(); sparkConf.setMaster(master); return Spark.create(sparkConf); }
Example 19
Source File: UsingBlurRDD.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@SuppressWarnings("serial") public static void main(String[] args) throws IOException { SparkConf sparkConf = new SparkConf(); sparkConf.setAppName("test"); sparkConf.setMaster("local[2]"); BlurSparkUtil.packJars(sparkConf, UsingBlurRDD.class); JavaSparkContext context = new JavaSparkContext(sparkConf); Iface client = BlurClient.getClient("127.0.0.1:40020"); BlurRDD blurRDD = new BlurRDD(client, sparkConf); String table = "test1234"; final String field = "fam0.col0"; for (int i = 0; i < 1; i++) { long s = System.nanoTime(); JavaRDD<String> rdd = blurRDD.executeStream(context, table, new StreamFunction<String>() { @Override public void call(IndexContext indexContext, StreamWriter<String> writer) throws Exception { IndexReader indexReader = indexContext.getIndexReader(); for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) { AtomicReader reader = atomicReaderContext.reader(); Terms terms = reader.fields().terms(field); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef ref; while ((ref = termsEnum.next()) != null) { writer.write(ref.utf8ToString()); } } } } }); long count = rdd.distinct().count(); long e = System.nanoTime(); System.out.println(count + " " + (e - s) / 1000000.0 + " ms"); } // Iterator<String> iterator = rdd.distinct().toLocalIterator(); // while (iterator.hasNext()) { // System.out.println(iterator.next()); // } context.close(); }
Example 20
Source File: WordCountingAppWithCheckpoint.java From tutorials with MIT License | 4 votes |
public static void main(String[] args) throws InterruptedException { Logger.getLogger("org") .setLevel(Level.OFF); Logger.getLogger("akka") .setLevel(Level.OFF); Map<String, Object> kafkaParams = new HashMap<>(); kafkaParams.put("bootstrap.servers", "localhost:9092"); kafkaParams.put("key.deserializer", StringDeserializer.class); kafkaParams.put("value.deserializer", StringDeserializer.class); kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream"); kafkaParams.put("auto.offset.reset", "latest"); kafkaParams.put("enable.auto.commit", false); Collection<String> topics = Arrays.asList("messages"); SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[2]"); sparkConf.setAppName("WordCountingAppWithCheckpoint"); sparkConf.set("spark.cassandra.connection.host", "127.0.0.1"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); sparkContext = streamingContext.sparkContext(); streamingContext.checkpoint("./.checkpoint"); JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String> Subscribe(topics, kafkaParams)); JavaPairDStream<String, String> results = messages.mapToPair(record -> new Tuple2<>(record.key(), record.value())); JavaDStream<String> lines = results.map(tuple2 -> tuple2._2()); JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(x.split("\\s+")) .iterator()); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1)) .reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2); JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((word, one, state) -> { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; })); cumulativeWordCounts.foreachRDD(javaRdd -> { List<Tuple2<String, Integer>> wordCountList = javaRdd.collect(); for (Tuple2<String, Integer> tuple : wordCountList) { List<Word> wordList = Arrays.asList(new Word(tuple._1, tuple._2)); JavaRDD<Word> rdd = sparkContext.parallelize(wordList); javaFunctions(rdd).writerBuilder("vocabulary", "words", mapToRow(Word.class)) .saveToCassandra(); } }); streamingContext.start(); streamingContext.awaitTermination(); }