Java Code Examples for org.apache.spark.api.java.JavaSparkContext#stop()
The following examples show how to use
org.apache.spark.api.java.JavaSparkContext#stop() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestKryoWarning.java From deeplearning4j with Apache License 2.0 | 6 votes |
private static void doTestCG(SparkConf sparkConf) { JavaSparkContext sc = new JavaSparkContext(sparkConf); try { ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in") .addLayer("0", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("0") .build(); TrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1).build(); SparkListenable scg = new SparkComputationGraph(sc, conf, tm); } finally { sc.stop(); } }
Example 2
Source File: JavaStandaloneIgniteRDDSelfTest.java From ignite with Apache License 2.0 | 6 votes |
/** * @throws Exception If failed. */ @Test public void testQueryObjectsFromIgnite() throws Exception { JavaSparkContext sc = new JavaSparkContext("local[*]", "test"); try { JavaIgniteContext<String, Entity> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider()); JavaIgniteRDD<String, Entity> cache = ic.fromCache(ENTITY_CACHE_NAME); cache.savePairs(sc.parallelize(F.range(0, 1001), 2).mapToPair(INT_TO_ENTITY_F)); List<Entity> res = cache.objectSql("Entity", "name = ? and salary = ?", "name50", 5000) .map(STR_ENTITY_PAIR_TO_ENTITY_F).collect(); assertEquals("Invalid result length", 1, res.size()); assertEquals("Invalid result", 50, res.get(0).id()); assertEquals("Invalid result", "name50", res.get(0).name()); assertEquals("Invalid result", 5000, res.get(0).salary()); assertEquals("Invalid count", 500, cache.objectSql("Entity", "id > 500").count()); } finally { sc.stop(); } }
Example 3
Source File: JavaStandaloneIgniteRDDSelfTest.java From ignite with Apache License 2.0 | 6 votes |
/** * @throws Exception If failed. */ @Test public void testStoreDataToIgnite() throws Exception { JavaSparkContext sc = new JavaSparkContext("local[*]", "test"); try { JavaIgniteContext<String, String> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider()); ic.fromCache(ENTITY_CACHE_NAME) .savePairs(sc.parallelize(F.range(0, KEYS_CNT), 2).mapToPair(TO_PAIR_F)); Ignite ignite = Ignition.ignite("grid-0"); IgniteCache<String, String> cache = ignite.cache(ENTITY_CACHE_NAME); for (int i = 0; i < KEYS_CNT; i++) { String val = cache.get(String.valueOf(i)); assertNotNull("Value was not put to cache for key: " + i, val); assertEquals("Invalid value stored for key: " + i, "val" + i, val); } } finally { sc.stop(); } }
Example 4
Source File: JavaKernelDensityEstimationExample.java From SparkDemo with MIT License | 6 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ // an RDD of sample data JavaRDD<Double> data = jsc.parallelize( Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0)); // Construct the density estimator with the sample data // and a standard deviation for the Gaussian kernels KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0); // Find density estimates for the given values double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0}); System.out.println(Arrays.toString(densities)); // $example off$ jsc.stop(); }
Example 5
Source File: JavaHypothesisTestingKolmogorovSmirnovTestExample.java From SparkDemo with MIT License | 6 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25)); KolmogorovSmirnovTestResult testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0); // summary of the test including the p-value, test statistic, and null hypothesis // if our p-value indicates significance, we can reject the null hypothesis System.out.println(testResult); // $example off$ jsc.stop(); }
Example 6
Source File: JavaCorrelationsExample.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ JavaDoubleRDD seriesX = jsc.parallelizeDoubles( Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0)); // a series // must have the same number of partitions and cardinality as seriesX JavaDoubleRDD seriesY = jsc.parallelizeDoubles( Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0)); // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. // If a method is not specified, Pearson's method will be used by default. Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson"); System.out.println("Correlation is: " + correlation); // note that each Vector is a row and not a column JavaRDD<Vector> data = jsc.parallelize( Arrays.asList( Vectors.dense(1.0, 10.0, 100.0), Vectors.dense(2.0, 20.0, 200.0), Vectors.dense(5.0, 33.0, 366.0) ) ); // calculate the correlation matrix using Pearson's method. // Use "spearman" for Spearman's method. // If a method is not specified, Pearson's method will be used by default. Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson"); System.out.println(correlMatrix.toString()); // $example off$ jsc.stop(); }
Example 7
Source File: ElasticsearchDependenciesJob.java From spark-dependencies with Apache License 2.0 | 5 votes |
void run(String[] spanIndices, String[] depIndices,String peerServiceTag) { JavaSparkContext sc = new JavaSparkContext(conf); try { for (int i = 0; i < spanIndices.length; i++) { String spanIndex = spanIndices[i]; String depIndex = depIndices[i]; log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanIndex, depIndex); // Send raw query to ES to select only the docs / spans we want to consider for this job // This doesn't change the default behavior as the daily indexes only contain up to 24h of data String esQuery = String.format("{\"range\": {\"startTimeMillis\": { \"gte\": \"now-%s\" }}}", spanRange); JavaPairRDD<String, Iterable<Span>> traces = JavaEsSpark.esJsonRDD(sc, spanIndex, esQuery) .map(new ElasticTupleToSpan()) .groupBy(Span::getTraceId); List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag); EsMajorVersion esMajorVersion = getEsVersion(); // Add type for ES < 7 // WARN log is produced for older ES versions, however it's produced by spark-es library and not ES itself, it cannot be disabled // WARN Resource: Detected type name in resource [jaeger-dependencies-2019-08-14/dependencies]. Type names are deprecated and will be removed in a later release. if (esMajorVersion.before(EsMajorVersion.V_7_X)) { depIndex = depIndex + "/dependencies"; } store(sc, dependencyLinks, depIndex); log.info("Done, {} dependency objects created", dependencyLinks.size()); if (dependencyLinks.size() > 0) { // we do not derive dependencies for old prefix "prefix:" if new prefix "prefix-" contains data break; } } } finally { sc.stop(); } }
Example 8
Source File: JavaDemo.java From spark-on-cassandra-quickstart with Apache License 2.0 | 5 votes |
private void run() { JavaSparkContext sc = new JavaSparkContext(conf); generateData(sc); compute(sc); showResults(sc); sc.stop(); }
Example 9
Source File: SplitFasta.java From ViraPipe with MIT License | 5 votes |
public static void main(String[] args) throws IOException { Options options = new Options(); Option pathOpt = new Option( "in", true, "Path to fastq file in hdfs." ); Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." ); options.addOption( new Option( "partitions", "Divide or merge to n partitions" ) ); options.addOption( pathOpt ); options.addOption( opOpt ); CommandLineParser parser = new BasicParser(); CommandLine cmd = null; try { // parse the command line arguments cmd = parser.parse( options, args ); } catch( ParseException exp ) { // oops, something went wrong System.err.println( "Parsing failed. Reason: " + exp.getMessage() ); } String out = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null; String in = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null; String partitions = (cmd.hasOption("partitions")==true)? cmd.getOptionValue("partitions"):null; SparkConf conf = new SparkConf().setAppName("SplitFasta"); JavaSparkContext sc = new JavaSparkContext(conf); sc.hadoopConfiguration().set("textinputformat.record.delimiter", ">"); JavaRDD<String> rdd = sc.textFile(in); JavaRDD<String> crdd = rdd.map(v->">"+v.trim()).repartition(Integer.valueOf(partitions)); crdd.saveAsTextFile(out); sc.stop(); }
Example 10
Source File: JavaEmbeddedIgniteRDDSelfTest.java From ignite with Apache License 2.0 | 5 votes |
/** * @throws Exception If failed. */ @Test public void testStoreDataToIgnite() throws Exception { JavaSparkContext sc = createContext(); JavaIgniteContext<String, String> ic = null; try { ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false); ic.fromCache(PARTITIONED_CACHE_NAME) .savePairs(sc.parallelize(F.range(0, KEYS_CNT), GRID_CNT).mapToPair(TO_PAIR_F), true, false); Ignite ignite = ic.ignite(); IgniteCache<String, String> cache = ignite.cache(PARTITIONED_CACHE_NAME); for (int i = 0; i < KEYS_CNT; i++) { String val = cache.get(String.valueOf(i)); assertNotNull("Value was not put to cache for key: " + i, val); assertEquals("Invalid value stored for key: " + i, "val" + i, val); } } finally { if (ic != null) ic.close(true); sc.stop(); } }
Example 11
Source File: BatchProcessor.java From lambda-arch with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Properties prop = PropertyFileReader.readPropertyFile("iot-spark.properties"); String file = prop.getProperty("com.iot.app.hdfs") + "iot-data-parque"; String[] jars = {prop.getProperty("com.iot.app.jar")}; JavaSparkContext sparkContext = getSparkContext(prop, jars); SQLContext sqlContext = new SQLContext(sparkContext); Dataset<Row> dataFrame = getDataFrame(sqlContext, file); JavaRDD<IoTData> rdd = dataFrame.javaRDD().map(getRowIoTDataFunction()); BatchHeatMapProcessor processor = new BatchHeatMapProcessor(); processor.processHeatMap(rdd); sparkContext.close(); sparkContext.stop(); }
Example 12
Source File: JavaSVDExample.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("SVD Example"); SparkContext sc = new SparkContext(conf); JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc); // $example on$ double[][] array = {{1.12, 2.05, 3.12}, {5.56, 6.28, 8.94}, {10.2, 8.0, 20.5}}; LinkedList<Vector> rowsList = new LinkedList<>(); for (int i = 0; i < array.length; i++) { Vector currentRow = Vectors.dense(array[i]); rowsList.add(currentRow); } JavaRDD<Vector> rows = jsc.parallelize(rowsList); // Create a RowMatrix from JavaRDD<Vector>. RowMatrix mat = new RowMatrix(rows.rdd()); // Compute the top 3 singular values and corresponding singular vectors. SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(3, true, 1.0E-9d); RowMatrix U = svd.U(); Vector s = svd.s(); Matrix V = svd.V(); // $example off$ Vector[] collectPartitions = (Vector[]) U.rows().collect(); System.out.println("U factor is:"); for (Vector vector : collectPartitions) { System.out.println("\t" + vector); } System.out.println("Singular values are: " + s); System.out.println("V factor is:\n" + V); jsc.stop(); }
Example 13
Source File: JavaGradientBoostingClassificationExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { // $example on$ SparkConf sparkConf = new SparkConf() .setAppName("JavaGradientBoostedTreesClassificationExample"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); // Load and parse the data file. String datapath = "data/mllib/sample_libsvm_data.txt"; JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD(); // Split the data into training and test sets (30% held out for testing) JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3}); JavaRDD<LabeledPoint> trainingData = splits[0]; JavaRDD<LabeledPoint> testData = splits[1]; // Train a GradientBoostedTrees model. // The defaultParams for Classification use LogLoss by default. BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Classification"); boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice. boostingStrategy.getTreeStrategy().setNumClasses(2); boostingStrategy.getTreeStrategy().setMaxDepth(5); // Empty categoricalFeaturesInfo indicates all features are continuous. Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo); final GradientBoostedTreesModel model = GradientBoostedTrees.train(trainingData, boostingStrategy); // Evaluate model on test instances and compute test error JavaPairRDD<Double, Double> predictionAndLabel = testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() { @Override public Tuple2<Double, Double> call(LabeledPoint p) { return new Tuple2<>(model.predict(p.features()), p.label()); } }); Double testErr = 1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() { @Override public Boolean call(Tuple2<Double, Double> pl) { return !pl._1().equals(pl._2()); } }).count() / testData.count(); System.out.println("Test Error: " + testErr); System.out.println("Learned classification GBT model:\n" + model.toDebugString()); // Save and load model model.save(jsc.sc(), "target/tmp/myGradientBoostingClassificationModel"); GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(jsc.sc(), "target/tmp/myGradientBoostingClassificationModel"); // $example off$ jsc.stop(); }
Example 14
Source File: SamToFastq.java From ViraPipe with MIT License | 4 votes |
public static void main(String[] args) throws IOException { SparkConf conf = new SparkConf().setAppName("SamToFastq"); sc = new JavaSparkContext(conf); String in = args[0]; String out = args[1]; JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(in, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration()); //Map to SAMRecord RDD JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get()); JavaPairRDD<Text, SequencedFragment> fastqrdd = mapSAMRecordsToFastq(samRDD); fastqrdd.saveAsNewAPIHadoopFile(out, Text.class, SequencedFragment.class, FastqOutputFormat.class, sc.hadoopConfiguration()); sc.stop(); }
Example 15
Source File: JavaRandomForestClassificationExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { // $example on$ SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestClassificationExample"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); // Load and parse the data file. String datapath = "data/mllib/sample_libsvm_data.txt"; JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD(); // Split the data into training and test sets (30% held out for testing) JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3}); JavaRDD<LabeledPoint> trainingData = splits[0]; JavaRDD<LabeledPoint> testData = splits[1]; // Train a RandomForest model. // Empty categoricalFeaturesInfo indicates all features are continuous. Integer numClasses = 2; HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); Integer numTrees = 3; // Use more in practice. String featureSubsetStrategy = "auto"; // Let the algorithm choose. String impurity = "gini"; Integer maxDepth = 5; Integer maxBins = 32; Integer seed = 12345; final RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed); // Evaluate model on test instances and compute test error JavaPairRDD<Double, Double> predictionAndLabel = testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() { @Override public Tuple2<Double, Double> call(LabeledPoint p) { return new Tuple2<>(model.predict(p.features()), p.label()); } }); Double testErr = 1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() { @Override public Boolean call(Tuple2<Double, Double> pl) { return !pl._1().equals(pl._2()); } }).count() / testData.count(); System.out.println("Test Error: " + testErr); System.out.println("Learned classification forest model:\n" + model.toDebugString()); // Save and load model model.save(jsc.sc(), "target/tmp/myRandomForestClassificationModel"); RandomForestModel sameModel = RandomForestModel.load(jsc.sc(), "target/tmp/myRandomForestClassificationModel"); // $example off$ jsc.stop(); }
Example 16
Source File: JavaEmbeddedIgniteRDDSelfTest.java From ignite with Apache License 2.0 | 4 votes |
/** * @throws Exception If failed. */ @Test public void testQueryFieldsFromIgnite() throws Exception { JavaSparkContext sc = createContext(); JavaIgniteContext<String, Entity> ic = null; try { ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false); JavaIgniteRDD<String, Entity> cache = ic.fromCache(PARTITIONED_CACHE_NAME); cache.savePairs(sc.parallelize(F.range(0, 1001), GRID_CNT).mapToPair(INT_TO_ENTITY_F), true, false); Dataset<Row> df = cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000); df.printSchema(); Row[] res = (Row[])df.collect(); assertEquals("Invalid result length", 1, res.length); assertEquals("Invalid result", 50, res[0].get(0)); assertEquals("Invalid result", "name50", res[0].get(1)); assertEquals("Invalid result", 5000, res[0].get(2)); Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000)); Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp); df.printSchema(); Row[] res0 = (Row[])df0.collect(); assertEquals("Invalid result length", 1, res0.length); assertEquals("Invalid result", 50, res0[0].get(0)); assertEquals("Invalid result", "name50", res0[0].get(1)); assertEquals("Invalid result", 5000, res0[0].get(2)); assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count()); } finally { if (ic != null) ic.close(true); sc.stop(); } }
Example 17
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAverageEveryStepGraphCNN() { //Idea: averaging every step with SGD (SGD updater + optimizer) is mathematically identical to doing the learning // on a single machine for synchronous distributed training //BUT: This is *ONLY* the case if all workers get an identical number of examples. This won't be the case if // we use RDD.randomSplit (which is what occurs if we use .fit(JavaRDD<DataSet> on a data set that needs splitting), // which might give a number of examples that isn't divisible by number of workers (like 39 examples on 4 executors) //This is also ONLY the case using SGD updater int miniBatchSizePerWorker = 10; int nWorkers = 4; for (boolean saveUpdater : new boolean[] {true, false}) { JavaSparkContext sc = getContext(nWorkers); try { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSetCNN(miniBatchSizePerWorker * nWorkers, seeds[i]); if (!saveUpdater) net.setUpdater(null); net.fit(ds); } INDArray finalParams = net.params().dup(); //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); for (int i = 0; i < seeds.length; i++) { List<DataSet> list = getOneDataSetAsIndividalExamplesCNN(miniBatchSizePerWorker * nWorkers, seeds[i]); JavaRDD<DataSet> rdd = sc.parallelize(list); sparkNet.fit(rdd); } // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); // System.out.println("Initial (Local) params: " + Arrays.toString(initialParams.data().asFloat())); // System.out.println("Initial (Spark) params: " + Arrays.toString(initialSparkParams.data().asFloat())); // System.out.println("Final (Local) params: " + Arrays.toString(finalParams.data().asFloat())); // System.out.println("Final (Spark) params: " + Arrays.toString(finalSparkParams.data().asFloat())); assertArrayEquals(initialParams.data().asFloat(), initialSparkParams.data().asFloat(), 1e-8f); assertArrayEquals(finalParams.data().asFloat(), finalSparkParams.data().asFloat(), 1e-6f); double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); assertEquals(net.score(), sparkScore, 1e-3); } finally { sc.stop(); } } }
Example 18
Source File: SparkKickoff.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 4 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE); final SparkConf conf = new SparkConf() .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES) .setAppName(APPLICATION_NAME) .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI); JavaSparkContext sparkContext = new JavaSparkContext(conf); // business logic sparkContext.stop(); sparkContext.close(); }
Example 19
Source File: JavaKMeansExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaKMeansExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ // Load and parse data String path = "data/mllib/kmeans_data.txt"; JavaRDD<String> data = jsc.textFile(path); JavaRDD<Vector> parsedData = data.map( new Function<String, Vector>() { public Vector call(String s) { String[] sarray = s.split(" "); double[] values = new double[sarray.length]; for (int i = 0; i < sarray.length; i++) { values[i] = Double.parseDouble(sarray[i]); } return Vectors.dense(values); } } ); parsedData.cache(); // Cluster the data into two classes using KMeans int numClusters = 2; int numIterations = 20; KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations); System.out.println("Cluster centers:"); for (Vector center: clusters.clusterCenters()) { System.out.println(" " + center); } double cost = clusters.computeCost(parsedData.rdd()); System.out.println("Cost: " + cost); // Evaluate clustering by computing Within Set Sum of Squared Errors double WSSSE = clusters.computeCost(parsedData.rdd()); System.out.println("Within Set Sum of Squared Errors = " + WSSSE); // Save and load model clusters.save(jsc.sc(), "target/org/apache/spark/JavaKMeansExample/KMeansModel"); KMeansModel sameModel = KMeansModel.load(jsc.sc(), "target/org/apache/spark/JavaKMeansExample/KMeansModel"); // $example off$ jsc.stop(); }
Example 20
Source File: FeatureClient.java From render with GNU General Public License v2.0 | 3 votes |
public void run(final SparkConf conf) throws IOException, URISyntaxException { final JavaSparkContext sparkContext = new JavaSparkContext(conf); final String sparkAppId = sparkContext.getConf().getAppId(); final String executorsJson = LogUtilities.getExecutorsApiJson(sparkAppId); LOG.info("run: appId is {}, executors data is {}", sparkAppId, executorsJson); for (final String pairJsonFileName : parameters.pairJson) { generateFeatureListsForPairFile(sparkContext, pairJsonFileName); } sparkContext.stop(); }