org.apache.flink.examples.java.clustering.KMeans Java Examples
The following examples show how to use
org.apache.flink.examples.java.clustering.KMeans.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PreviewPlanDumpTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void dumpIterativeKMeans() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { KMeans.main(new String[] { "--points ", IN_FILE, "--centroids ", IN_FILE, "--output ", OUT_FILE, "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("KMeans failed with an exception"); } dump(env.getPlan()); }
Example #2
Source File: DumpCompiledPlanTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void dumpIterativeKMeans() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { KMeans.main(new String[] { "--points ", IN_FILE, "--centroids ", IN_FILE, "--output ", OUT_FILE, "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("KMeans failed with an exception"); } dump(env.getPlan()); }
Example #3
Source File: PreviewPlanDumpTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void dumpIterativeKMeans() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { KMeans.main(new String[] { "--points ", IN_FILE, "--centroids ", IN_FILE, "--output ", OUT_FILE, "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("KMeans failed with an exception"); } dump(env.getPlan()); }
Example #4
Source File: DumpCompiledPlanTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void dumpIterativeKMeans() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { KMeans.main(new String[] { "--points ", IN_FILE, "--centroids ", IN_FILE, "--output ", OUT_FILE, "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("KMeans failed with an exception"); } dump(env.getPlan()); }
Example #5
Source File: SuccessAfterNetworkBuffersFailureITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static void runKMeans(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); // get input data DataSet<KMeans.Point> points = KMeansData.getDefaultPointDataSet(env).rebalance(); DataSet<KMeans.Centroid> centroids = KMeansData.getDefaultCentroidDataSet(env).rebalance(); // set number of bulk iterations for KMeans algorithm IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20); // add some re-partitions to increase network buffer use DataSet<KMeans.Centroid> newCentroids = points // compute closest centroid for each point .map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids") .rebalance() // count and sum point coordinates for each centroid .map(new KMeans.CountAppender()) .groupBy(0).reduce(new KMeans.CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .rebalance() .map(new KMeans.CentroidAverager()); // feed new centroids back into next iteration DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids); DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points // assign points to final clusters .map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids"); clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>()); env.execute("KMeans Example"); }
Example #6
Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0 | 5 votes |
private static void runKMeans(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); // get input data DataSet<KMeans.Point> points = KMeansData.getDefaultPointDataSet(env).rebalance(); DataSet<KMeans.Centroid> centroids = KMeansData.getDefaultCentroidDataSet(env).rebalance(); // set number of bulk iterations for KMeans algorithm IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20); // add some re-partitions to increase network buffer use DataSet<KMeans.Centroid> newCentroids = points // compute closest centroid for each point .map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids") .rebalance() // count and sum point coordinates for each centroid .map(new KMeans.CountAppender()) .groupBy(0).reduce(new KMeans.CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .rebalance() .map(new KMeans.CentroidAverager()); // feed new centroids back into next iteration DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids); DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points // assign points to final clusters .map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids"); clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>()); env.execute("KMeans Example"); }
Example #7
Source File: SuccessAfterNetworkBuffersFailureITCase.java From flink with Apache License 2.0 | 5 votes |
private static void runKMeans(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); // get input data DataSet<KMeans.Point> points = KMeansData.getDefaultPointDataSet(env).rebalance(); DataSet<KMeans.Centroid> centroids = KMeansData.getDefaultCentroidDataSet(env).rebalance(); // set number of bulk iterations for KMeans algorithm IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20); // add some re-partitions to increase network buffer use DataSet<KMeans.Centroid> newCentroids = points // compute closest centroid for each point .map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids") .rebalance() // count and sum point coordinates for each centroid .map(new KMeans.CountAppender()) .groupBy(0).reduce(new KMeans.CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .rebalance() .map(new KMeans.CentroidAverager()); // feed new centroids back into next iteration DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids); DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points // assign points to final clusters .map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids"); clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>()); env.execute("KMeans Example"); }
Example #8
Source File: PreviewPlanDumpTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void dumpIterativeKMeans() throws Exception { verifyPlanDump(KMeans.class, "--points ", IN_FILE, "--centroids ", IN_FILE, "--output ", OUT_FILE, "--iterations", "123"); }
Example #9
Source File: DumpCompiledPlanTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void dumpIterativeKMeans() throws Exception { verifyOptimizedPlan(KMeans.class, "--points ", IN_FILE, "--centroids ", IN_FILE, "--output ", OUT_FILE, "--iterations", "123"); }