Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#createLocalEnvironment()
The following examples show how to use
org.apache.flink.api.java.ExecutionEnvironment#createLocalEnvironment() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StochasticVITest.java From toolbox with Apache License 2.0 | 6 votes |
public static void testGaussian1() throws IOException, ClassNotFoundException { //for (int i = 2; i <3; i++) { BayesianNetwork bn = BayesianNetworkLoader.loadFromFile("../networks/simulated/Normal_MultinomialParents.bn"); //bn.randomInitialization(new Random(0)); BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); sampler.setSeed(2); DataStream<DataInstance> data = sampler.sampleToDataStream(10000); //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); baseTest(env, data, bn, 10000, 1000, 0.2); //} }
Example 2
Source File: dVMPTest.java From toolbox with Apache License 2.0 | 6 votes |
public static void testGaussian1() throws IOException, ClassNotFoundException { //for (int i = 2; i <3; i++) { BayesianNetwork bn = BayesianNetworkLoader.loadFromFile("../networks/simulated/Normal_MultinomialParents.bn"); //bn.randomInitialization(new Random(0)); BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); sampler.setSeed(2); DataStream<DataInstance> data = sampler.sampleToDataStream(10000); //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); baseTest(env, data, bn, 1000, 0.2); //} }
Example 3
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1) .splitsOrderedBy(new int[]{1, 0}, new Order[]{Order.ASCENDING, Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1, 0))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 4
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind multiple map ops. */ @Test public void checkJoinWithReplicatedSourceInputBehindMultiMaps() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .mapPartition(new IdPMap()) .flatMap(new IdFlatMap()) .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 5
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 6
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind flatMap. */ @Test public void checkJoinWithReplicatedSourceInputBehindFlatMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .flatMap(new IdFlatMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 7
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource5() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f2") .splitsGroupedBy("f2"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 8
Source File: dVMPv1Test.java From toolbox with Apache License 2.0 | 5 votes |
public static void testMultinomials2() throws IOException, ClassNotFoundException { Variables variables = new Variables(); Variable varA = variables.newMultinomialVariable("A", 2); Variable varB = variables.newMultinomialVariable("B", 2); DAG dag = new DAG(variables); dag.getParentSet(varB).addParent(varA); BayesianNetwork bn = new BayesianNetwork(dag); Multinomial distA = bn.getConditionalDistribution(varA); Multinomial_MultinomialParents distB = bn.getConditionalDistribution(varB); distA.setProbabilities(new double[]{0.6, 0.4}); distB.getMultinomial(0).setProbabilities(new double[]{0.75, 0.25}); distB.getMultinomial(1).setProbabilities(new double[]{0.25, 0.75}); BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); sampler.setSeed(2); DataStream<DataInstance> data = sampler.sampleToDataStream(1000); //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); baseTest(env, data, bn, 100, 0.05); }
Example 9
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0) .splitsGroupedBy(1, 0); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 10
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 11
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctExpressionKey() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example 12
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource6() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField") .splitsOrderedBy("f1", new Order[]{Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3))); Assert.assertTrue(lprops.getOrdering() == null); }
Example 13
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1, 0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example 14
Source File: dVMPTest.java From toolbox with Apache License 2.0 | 4 votes |
public void testingMLParallelAsiaWithUpdate() throws IOException, ClassNotFoundException { //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); // load the true Asia Bayesian network BayesianNetwork asianet = BayesianNetworkLoader.loadFromFile("../networks/dataWeka/asia.bn"); asianet.randomInitialization(new Random(0)); if (Main.VERBOSE) System.out.println("\nAsia network \n "); //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString()); if (Main.VERBOSE) System.out.println(asianet.toString()); //Sampling from Asia BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(asianet); sampler.setSeed(0); //Load the sampled data DataStream<DataInstance> data = sampler.sampleToDataStream(10000); DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff"); DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false); //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure // and just learn then test the parameter learning //Parameter Learning dVMP parallelVB = new dVMP(); parallelVB.setOutput(true); parallelVB.setSeed(5); parallelVB.setBatchSize(1000); parallelVB.setLocalThreshold(0.001); parallelVB.setGlobalThreshold(0.05); parallelVB.setMaximumLocalIterations(100); parallelVB.setMaximumGlobalIterations(100); parallelVB.setDAG(asianet.getDAG()); parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork(); //Check if the probability distributions of each node for (Variable var : asianet.getVariables()) { if (Main.VERBOSE) System.out.println("\n------ Variable " + var.getName() + " ------"); if (Main.VERBOSE) System.out.println("\nTrue distribution:\n" + asianet.getConditionalDistribution(var)); if (Main.VERBOSE) System.out.println("\nLearned distribution:\n" + bnet.getConditionalDistribution(var)); Assert.assertTrue(bnet.getConditionalDistribution(var).equalDist(asianet.getConditionalDistribution(var), 0.05)); } //Or check directly if the true and learned networks are equals Assert.assertTrue(bnet.equalBNs(asianet, 0.05)); }
Example 15
Source File: UnionTranslationTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void translateUnion3SortedGroup() { try { final int parallelism = 4; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 2); DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 3); DataSet<Tuple3<Double, StringValue, LongValue>> dataset3 = getSourceDataSet(env, -1); dataset1.union(dataset2).union(dataset3) .groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "") .sortGroup((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "", Order.ASCENDING) .reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {}) .returns(String.class) .output(new DiscardingOutputFormat<>()); Plan p = env.createProgramPlan(); // The plan should look like the following one. // // DataSet1(2) - MapOperator(2)-+ // |- Union(-1) -+ // DataSet2(3) - MapOperator(3)-+ |- Union(-1) - SingleInputOperator - Sink // | // DataSet3(-1) - MapOperator(-1)-+ GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); Union secondUnionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput(); // The first input of the second union should be the first union. Union firstUnionOperator = (Union) secondUnionOperator.getFirstInput(); // The key mapper should be added to the second input stream of the second union. assertTrue(secondUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>); // The key mappers should be added to both of the two input streams for the first union. assertTrue(firstUnionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>); assertTrue(firstUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>); // The parallelisms of the key mappers should be equal to those of their inputs. assertEquals(firstUnionOperator.getFirstInput().getParallelism(), 2); assertEquals(firstUnionOperator.getSecondInput().getParallelism(), 3); assertEquals(secondUnionOperator.getSecondInput().getParallelism(), -1); // The union should always have the default parallelism. assertEquals(secondUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT); assertEquals(firstUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example 16
Source File: dVMPTest.java From toolbox with Apache License 2.0 | 4 votes |
public void testingMLParallelWasteHidden() throws IOException, ClassNotFoundException { //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); // load the true WasteIncinerator Bayesian network BayesianNetwork wasteIncinerator = BayesianNetworkLoader.loadFromFile("../networks/simulated/WasteIncinerator.bn"); wasteIncinerator.randomInitialization(new Random(0)); if (Main.VERBOSE) System.out.println("\nAsia network \n "); //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString()); if (Main.VERBOSE) System.out.println(wasteIncinerator.toString()); //Sampling from WasteIncinerator BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(wasteIncinerator); sampler.setSeed(0); //Load the sampled data DataStream<DataInstance> data = sampler.sampleToDataStream(1000); sampler.setHiddenVar(wasteIncinerator.getVariables().getVariableById(6)); DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff"); //We load the data DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false); //ParallelVB is defined dVMP parallelVB = new dVMP(); parallelVB.setOutput(true); parallelVB.setSeed(5); parallelVB.setBatchSize(100); parallelVB.setLocalThreshold(0.001); parallelVB.setGlobalThreshold(0.001); parallelVB.setMaximumLocalIterations(100); parallelVB.setMaximumGlobalIterations(100); //Setting DAG parallelVB.setDAG(wasteIncinerator.getDAG()); //Setting the distributed data source parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork(); if (Main.VERBOSE) System.out.println(bnet.toString()); }
Example 17
Source File: dVMPv1Test.java From toolbox with Apache License 2.0 | 4 votes |
public void testingMLParallelAsiaHidden() throws IOException, ClassNotFoundException { //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); // load the true Asia Bayesian network BayesianNetwork asianet = BayesianNetworkLoader.loadFromFile("../networks/dataWeka/asia.bn"); asianet.randomInitialization(new Random(0)); if (Main.VERBOSE) System.out.println("\nAsia network \n "); //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString()); if (Main.VERBOSE) System.out.println(asianet.toString()); //Sampling from Asia BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(asianet); sampler.setSeed(0); //Load the sampled data DataStream<DataInstance> data = sampler.sampleToDataStream(10000); sampler.setHiddenVar(asianet.getVariables().getVariableById(7)); DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff"); DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false); //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure // and just learn then test the parameter learning long start = System.nanoTime(); //Parameter Learning dVMPv1 parallelVB = new dVMPv1(); parallelVB.setOutput(true); parallelVB.setSeed(5); parallelVB.setBatchSize(100); parallelVB.setLocalThreshold(0.001); parallelVB.setGlobalThreshold(0.05); parallelVB.setMaximumLocalIterations(100); parallelVB.setMaximumGlobalIterations(100); parallelVB.setDAG(asianet.getDAG()); parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork(); if (Main.VERBOSE) System.out.println(bnet.toString()); long duration = (System.nanoTime() - start) / 1; double seconds = duration / 1000000000.0; if (Main.VERBOSE) System.out.println("Running time: \n" + seconds + " secs"); }
Example 18
Source File: ParallelVBTest.java From toolbox with Apache License 2.0 | 4 votes |
public void testingMLParallelAsiaHidden() throws IOException, ClassNotFoundException { //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); // load the true Asia Bayesian network BayesianNetwork asianet = BayesianNetworkLoader.loadFromFile("../networks/dataWeka/asia.bn"); asianet.randomInitialization(new Random(0)); if (Main.VERBOSE) System.out.println("\nAsia network \n "); //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString()); if (Main.VERBOSE) System.out.println(asianet.toString()); //Sampling from Asia BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(asianet); sampler.setSeed(0); //Load the sampled data DataStream<DataInstance> data = sampler.sampleToDataStream(10000); sampler.setHiddenVar(asianet.getVariables().getVariableById(7)); DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff"); DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false); //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure // and just learn then test the parameter learning long start = System.nanoTime(); //Parameter Learning ParallelVB parallelVB = new ParallelVB(); parallelVB.setOutput(true); parallelVB.setSeed(5); parallelVB.setBatchSize(100); parallelVB.setLocalThreshold(0.001); parallelVB.setGlobalThreshold(0.05); parallelVB.setMaximumLocalIterations(100); parallelVB.setMaximumGlobalIterations(100); parallelVB.setDAG(asianet.getDAG()); parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork(); if (Main.VERBOSE) System.out.println(bnet.toString()); long duration = (System.nanoTime() - start) / 1; double seconds = duration / 1000000000.0; if (Main.VERBOSE) System.out.println("Running time: \n" + seconds + " secs"); }
Example 19
Source File: dVMPv1Test.java From toolbox with Apache License 2.0 | 4 votes |
public void testParallelVBExtended() throws Exception { int nCVars = 50;//Integer.parseInt(args[0]); int nMVars = 50;//Integer.parseInt(args[1]); int nSamples = 1000;//Integer.parseInt(args[2]); int windowSize = 100;//Integer.parseInt(args[3]); int globalIter = 10;//Integer.parseInt(args[4]); int localIter = 100;//Integer.parseInt(args[5]); int seed = 0;//Integer.parseInt(args[6]); //Set-up Flink session. Configuration conf = new Configuration(); conf.setInteger("taskmanager.network.numberOfBuffers", 12000); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.getConfig().disableSysoutLogging(); env.setParallelism(Main.PARALLELISM); /* * Logging */ //BasicConfigurator.configure(); //PropertyConfigurator.configure(args[7]); //String fileName = "hdfs:///tmp"+nCVars+"_"+nMVars+"_"+nSamples+"_"+windowSize+"_"+globalIter+"_"+localIter+".arff"; String fileName = "../datasets/tmp"+nCVars+"_"+nMVars+"_"+nSamples+"_"+windowSize+"_"+globalIter+"_"+localIter+".arff"; // Randomly generate the data stream using {@link BayesianNetworkGenerator} and {@link BayesianNetworkSampler}. BayesianNetworkGenerator.setSeed(seed); BayesianNetworkGenerator.setNumberOfGaussianVars(nCVars); BayesianNetworkGenerator.setNumberOfMultinomialVars(nMVars, 2); BayesianNetwork originalBnet = BayesianNetworkGenerator.generateBayesianNetwork(); //Sampling from Asia BN eu.amidst.flinklink.core.utils.BayesianNetworkSampler sampler = new eu.amidst.flinklink.core.utils.BayesianNetworkSampler(originalBnet); sampler.setSeed(seed); //Load the sampled data DataFlink<DataInstance> data = sampler.sampleToDataFlink(env,nSamples); DataFlinkWriter.writeDataToARFFFolder(data,fileName); DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFolder(env,fileName, false); DAG hiddenNB = getHiddenNaiveBayesStructure(dataFlink.getAttributes()); //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure // and just learn then test the parameter learning long start = System.nanoTime(); //Parameter Learning dVMPv1 parallelVB = new dVMPv1(); parallelVB.setGlobalThreshold(0.1); parallelVB.setMaximumGlobalIterations(globalIter); parallelVB.setLocalThreshold(0.1); parallelVB.setMaximumLocalIterations(localIter); parallelVB.setSeed(5); //Set the window size parallelVB.setBatchSize(windowSize); parallelVB.setDAG(hiddenNB); parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork LearnedBnet = parallelVB.getLearntBayesianNetwork(); if (Main.VERBOSE) System.out.println(LearnedBnet.toString()); long duration = (System.nanoTime() - start) / 1; double seconds = duration / 1000000000.0; //logger.info("Global ELBO: {}", parallelVB.getLogMarginalProbability()); }
Example 20
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void translateDistinctKeySelector() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() { public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) { return value.f1; } }).setParallelism(4).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> keyRemover = (MapOperatorBase<?, ?, ?>) sink.getInput(); PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyRemover.getInput(); MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput(); // check the parallelisms assertEquals(1, keyExtractor.getParallelism()); assertEquals(4, reducer.getParallelism()); // check types TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>( new ValueTypeInfo<StringValue>(StringValue.class), initialData.getType()); assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType()); assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType()); assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType()); assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType()); assertEquals(keyValueInfo, keyRemover.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), keyRemover.getOperatorInfo().getOutputType()); // check keys assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass()); assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }