org.apache.flink.api.java.ExecutionEnvironment#createLocalEnvironment

Source File: StochasticVITest.java From toolbox with Apache License 2.0

6 votes

public static void testGaussian1() throws IOException, ClassNotFoundException {


        //for (int i = 2; i <3; i++) {
        BayesianNetwork bn = BayesianNetworkLoader.loadFromFile("../networks/simulated/Normal_MultinomialParents.bn");
        //bn.randomInitialization(new Random(0));

        BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn);
        sampler.setSeed(2);
        DataStream<DataInstance> data = sampler.sampleToDataStream(10000);

        //Set-up Flink session.
        Configuration conf = new Configuration();
        conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
        final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
                env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

        baseTest(env, data, bn, 10000, 1000, 0.2);

        //}
    }

Source File: dVMPTest.java From toolbox with Apache License 2.0

6 votes

public static void testGaussian1() throws IOException, ClassNotFoundException {


        //for (int i = 2; i <3; i++) {
            BayesianNetwork bn = BayesianNetworkLoader.loadFromFile("../networks/simulated/Normal_MultinomialParents.bn");
            //bn.randomInitialization(new Random(0));

            BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn);
            sampler.setSeed(2);
            DataStream<DataInstance> data = sampler.sampleToDataStream(10000);

            //Set-up Flink session.
            Configuration conf = new Configuration();
            conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
            final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
                    env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

            baseTest(env, data, bn, 1000, 0.2);

        //}
    }

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsOrderedBy(new int[]{1, 0}, new Order[]{Order.ASCENDING, Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1, 0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind multiple map ops.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMultiMaps() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.mapPartition(new IdPMap())
			.flatMap(new IdFlatMap())
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind flatMap.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.flatMap(new IdFlatMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: dVMPv1Test.java From toolbox with Apache License 2.0

5 votes

public static void testMultinomials2() throws IOException, ClassNotFoundException {
    Variables variables = new Variables();
    Variable varA = variables.newMultinomialVariable("A", 2);
    Variable varB = variables.newMultinomialVariable("B", 2);

    DAG dag = new DAG(variables);

    dag.getParentSet(varB).addParent(varA);

    BayesianNetwork bn = new BayesianNetwork(dag);

    Multinomial distA = bn.getConditionalDistribution(varA);
    Multinomial_MultinomialParents distB = bn.getConditionalDistribution(varB);

    distA.setProbabilities(new double[]{0.6, 0.4});
    distB.getMultinomial(0).setProbabilities(new double[]{0.75, 0.25});
    distB.getMultinomial(1).setProbabilities(new double[]{0.25, 0.75});

    BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn);
    sampler.setSeed(2);
    DataStream<DataInstance> data = sampler.sampleToDataStream(1000);

    //Set-up Flink session.
    Configuration conf = new Configuration();
    conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
    final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
            env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

    baseTest(env, data, bn, 100, 0.05);

}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}

Source File: DistinctTranslationTest.java From flink with Apache License 2.0

5 votes

@Test
public void translateDistinctExpressionKey() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: dVMPTest.java From toolbox with Apache License 2.0

4 votes

public void testingMLParallelAsiaWithUpdate() throws IOException, ClassNotFoundException {

        //Set-up Flink session.
        Configuration conf = new Configuration();
        conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
        final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
                env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

        // load the true Asia Bayesian network
        BayesianNetwork asianet = BayesianNetworkLoader.loadFromFile("../networks/dataWeka/asia.bn");
        asianet.randomInitialization(new Random(0));
        if (Main.VERBOSE) System.out.println("\nAsia network \n ");
        //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString());
        if (Main.VERBOSE) System.out.println(asianet.toString());

        //Sampling from Asia BN
        BayesianNetworkSampler sampler = new BayesianNetworkSampler(asianet);
        sampler.setSeed(0);
        //Load the sampled data
        DataStream<DataInstance> data = sampler.sampleToDataStream(10000);

        DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff");

        DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false);

        //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure
        // and just learn then test the parameter learning

        //Parameter Learning
        dVMP parallelVB = new dVMP();
        parallelVB.setOutput(true);
        parallelVB.setSeed(5);
        parallelVB.setBatchSize(1000);
        parallelVB.setLocalThreshold(0.001);
        parallelVB.setGlobalThreshold(0.05);
        parallelVB.setMaximumLocalIterations(100);
        parallelVB.setMaximumGlobalIterations(100);


        parallelVB.setDAG(asianet.getDAG());
        parallelVB.initLearning();
        parallelVB.updateModel(dataFlink);
        BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork();

        //Check if the probability distributions of each node
        for (Variable var : asianet.getVariables()) {
            if (Main.VERBOSE) System.out.println("\n------ Variable " + var.getName() + " ------");
            if (Main.VERBOSE) System.out.println("\nTrue distribution:\n" + asianet.getConditionalDistribution(var));
            if (Main.VERBOSE) System.out.println("\nLearned distribution:\n" + bnet.getConditionalDistribution(var));
            Assert.assertTrue(bnet.getConditionalDistribution(var).equalDist(asianet.getConditionalDistribution(var), 0.05));
        }

        //Or check directly if the true and learned networks are equals
        Assert.assertTrue(bnet.equalBNs(asianet, 0.05));
    }

Source File: UnionTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void translateUnion3SortedGroup() {
	try {
		final int parallelism = 4;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 2);

		DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 3);

		DataSet<Tuple3<Double, StringValue, LongValue>> dataset3 = getSourceDataSet(env, -1);

		dataset1.union(dataset2).union(dataset3)
				.groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "")
				.sortGroup((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "", Order.ASCENDING)
				.reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {})
				.returns(String.class)
				.output(new DiscardingOutputFormat<>());

		Plan p = env.createProgramPlan();

		// The plan should look like the following one.
		//
		// DataSet1(2) - MapOperator(2)-+
		//	                            |- Union(-1) -+
		// DataSet2(3) - MapOperator(3)-+             |- Union(-1) - SingleInputOperator - Sink
		//                                            |
		//             DataSet3(-1) - MapOperator(-1)-+

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
		Union secondUnionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();

		// The first input of the second union should be the first union.
		Union firstUnionOperator = (Union) secondUnionOperator.getFirstInput();

		// The key mapper should be added to the second input stream of the second union.
		assertTrue(secondUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);

		// The key mappers should be added to both of the two input streams for the first union.
		assertTrue(firstUnionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
		assertTrue(firstUnionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);

		// The parallelisms of the key mappers should be equal to those of their inputs.
		assertEquals(firstUnionOperator.getFirstInput().getParallelism(), 2);
		assertEquals(firstUnionOperator.getSecondInput().getParallelism(), 3);
		assertEquals(secondUnionOperator.getSecondInput().getParallelism(), -1);

		// The union should always have the default parallelism.
		assertEquals(secondUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
		assertEquals(firstUnionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}

Source File: dVMPTest.java From toolbox with Apache License 2.0

4 votes

public void testingMLParallelWasteHidden() throws IOException, ClassNotFoundException {
    //Set-up Flink session.
    Configuration conf = new Configuration();
    conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
    final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
            env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);


    // load the true WasteIncinerator Bayesian network
    BayesianNetwork wasteIncinerator = BayesianNetworkLoader.loadFromFile("../networks/simulated/WasteIncinerator.bn");
    wasteIncinerator.randomInitialization(new Random(0));
    if (Main.VERBOSE) System.out.println("\nAsia network \n ");
    //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString());
    if (Main.VERBOSE) System.out.println(wasteIncinerator.toString());

    //Sampling from WasteIncinerator BN
    BayesianNetworkSampler sampler = new BayesianNetworkSampler(wasteIncinerator);
    sampler.setSeed(0);
    //Load the sampled data
    DataStream<DataInstance> data = sampler.sampleToDataStream(1000);
    sampler.setHiddenVar(wasteIncinerator.getVariables().getVariableById(6));
    DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff");

    //We load the data
    DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false);


    //ParallelVB is defined
    dVMP parallelVB = new dVMP();
    parallelVB.setOutput(true);
    parallelVB.setSeed(5);
    parallelVB.setBatchSize(100);
    parallelVB.setLocalThreshold(0.001);
    parallelVB.setGlobalThreshold(0.001);
    parallelVB.setMaximumLocalIterations(100);
    parallelVB.setMaximumGlobalIterations(100);

    //Setting DAG
    parallelVB.setDAG(wasteIncinerator.getDAG());


    //Setting the distributed data source
    parallelVB.initLearning();
    parallelVB.updateModel(dataFlink);

    BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork();

    if (Main.VERBOSE) System.out.println(bnet.toString());
}

Source File: dVMPv1Test.java From toolbox with Apache License 2.0

4 votes

public void testingMLParallelAsiaHidden() throws IOException, ClassNotFoundException {

        //Set-up Flink session.
        Configuration conf = new Configuration();
        conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
        final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
                env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

        // load the true Asia Bayesian network
        BayesianNetwork asianet = BayesianNetworkLoader.loadFromFile("../networks/dataWeka/asia.bn");
        asianet.randomInitialization(new Random(0));
        if (Main.VERBOSE) System.out.println("\nAsia network \n ");
        //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString());
        if (Main.VERBOSE) System.out.println(asianet.toString());

        //Sampling from Asia BN
        BayesianNetworkSampler sampler = new BayesianNetworkSampler(asianet);
        sampler.setSeed(0);
        //Load the sampled data
        DataStream<DataInstance> data = sampler.sampleToDataStream(10000);
        sampler.setHiddenVar(asianet.getVariables().getVariableById(7));
        DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff");

        DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false);

        //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure
        // and just learn then test the parameter learning

        long start = System.nanoTime();

        //Parameter Learning
        dVMPv1 parallelVB = new dVMPv1();
        parallelVB.setOutput(true);
        parallelVB.setSeed(5);
        parallelVB.setBatchSize(100);
        parallelVB.setLocalThreshold(0.001);
        parallelVB.setGlobalThreshold(0.05);
        parallelVB.setMaximumLocalIterations(100);
        parallelVB.setMaximumGlobalIterations(100);


        parallelVB.setDAG(asianet.getDAG());
        parallelVB.initLearning();
        parallelVB.updateModel(dataFlink);
        BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork();

        if (Main.VERBOSE) System.out.println(bnet.toString());

        long duration = (System.nanoTime() - start) / 1;
        double seconds = duration / 1000000000.0;
        if (Main.VERBOSE) System.out.println("Running time: \n" + seconds + " secs");

    }

Source File: ParallelVBTest.java From toolbox with Apache License 2.0

4 votes

public void testingMLParallelAsiaHidden() throws IOException, ClassNotFoundException {

        //Set-up Flink session.
        Configuration conf = new Configuration();
        conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
        final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
                env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

        // load the true Asia Bayesian network
        BayesianNetwork asianet = BayesianNetworkLoader.loadFromFile("../networks/dataWeka/asia.bn");
        asianet.randomInitialization(new Random(0));
        if (Main.VERBOSE) System.out.println("\nAsia network \n ");
        //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString());
        if (Main.VERBOSE) System.out.println(asianet.toString());

        //Sampling from Asia BN
        BayesianNetworkSampler sampler = new BayesianNetworkSampler(asianet);
        sampler.setSeed(0);
        //Load the sampled data
        DataStream<DataInstance> data = sampler.sampleToDataStream(10000);
        sampler.setHiddenVar(asianet.getVariables().getVariableById(7));
        DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff");

        DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false);

        //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure
        // and just learn then test the parameter learning

        long start = System.nanoTime();

        //Parameter Learning
        ParallelVB parallelVB = new ParallelVB();
        parallelVB.setOutput(true);
        parallelVB.setSeed(5);
        parallelVB.setBatchSize(100);
        parallelVB.setLocalThreshold(0.001);
        parallelVB.setGlobalThreshold(0.05);
        parallelVB.setMaximumLocalIterations(100);
        parallelVB.setMaximumGlobalIterations(100);


        parallelVB.setDAG(asianet.getDAG());
        parallelVB.initLearning();
        parallelVB.updateModel(dataFlink);
        BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork();

        if (Main.VERBOSE) System.out.println(bnet.toString());

        long duration = (System.nanoTime() - start) / 1;
        double seconds = duration / 1000000000.0;
        if (Main.VERBOSE) System.out.println("Running time: \n" + seconds + " secs");

    }

Source File: dVMPv1Test.java From toolbox with Apache License 2.0

4 votes

public void testParallelVBExtended() throws Exception {

        int nCVars = 50;//Integer.parseInt(args[0]);
        int nMVars = 50;//Integer.parseInt(args[1]);
        int nSamples = 1000;//Integer.parseInt(args[2]);
        int windowSize = 100;//Integer.parseInt(args[3]);
        int globalIter = 10;//Integer.parseInt(args[4]);
        int localIter = 100;//Integer.parseInt(args[5]);
        int seed = 0;//Integer.parseInt(args[6]);

        //Set-up Flink session.
        Configuration conf = new Configuration();
        conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
        final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
                env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);

        /*
         * Logging
         */
        //BasicConfigurator.configure();
        //PropertyConfigurator.configure(args[7]);

        //String fileName = "hdfs:///tmp"+nCVars+"_"+nMVars+"_"+nSamples+"_"+windowSize+"_"+globalIter+"_"+localIter+".arff";
        String fileName = "../datasets/tmp"+nCVars+"_"+nMVars+"_"+nSamples+"_"+windowSize+"_"+globalIter+"_"+localIter+".arff";

        // Randomly generate the data stream using {@link BayesianNetworkGenerator} and {@link BayesianNetworkSampler}.
        BayesianNetworkGenerator.setSeed(seed);
        BayesianNetworkGenerator.setNumberOfGaussianVars(nCVars);
        BayesianNetworkGenerator.setNumberOfMultinomialVars(nMVars, 2);
        BayesianNetwork originalBnet  = BayesianNetworkGenerator.generateBayesianNetwork();

        //Sampling from Asia BN
        eu.amidst.flinklink.core.utils.BayesianNetworkSampler sampler = new eu.amidst.flinklink.core.utils.BayesianNetworkSampler(originalBnet);
        sampler.setSeed(seed);

        //Load the sampled data
        DataFlink<DataInstance> data = sampler.sampleToDataFlink(env,nSamples);

        DataFlinkWriter.writeDataToARFFFolder(data,fileName);

        DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFolder(env,fileName, false);

        DAG hiddenNB = getHiddenNaiveBayesStructure(dataFlink.getAttributes());


        //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure
        // and just learn then test the parameter learning

        long start = System.nanoTime();

        //Parameter Learning
        dVMPv1 parallelVB = new dVMPv1();
        parallelVB.setGlobalThreshold(0.1);
        parallelVB.setMaximumGlobalIterations(globalIter);
        parallelVB.setLocalThreshold(0.1);
        parallelVB.setMaximumLocalIterations(localIter);
        parallelVB.setSeed(5);

        //Set the window size
        parallelVB.setBatchSize(windowSize);


        parallelVB.setDAG(hiddenNB);
        parallelVB.initLearning();
        parallelVB.updateModel(dataFlink);
        BayesianNetwork LearnedBnet = parallelVB.getLearntBayesianNetwork();
        if (Main.VERBOSE) System.out.println(LearnedBnet.toString());

        long duration = (System.nanoTime() - start) / 1;
        double seconds = duration / 1000000000.0;
        //logger.info("Global ELBO: {}", parallelVB.getLogMarginalProbability());

    }

Source File: DistinctTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void translateDistinctKeySelector() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(new KeySelector<Tuple3<Double, StringValue, LongValue>, StringValue>() {
			public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
				return value.f1;
			}
		}).setParallelism(4).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		MapOperatorBase<?, ?, ?> keyRemover = (MapOperatorBase<?, ?, ?>) sink.getInput();
		PlanUnwrappingReduceOperator<?, ?> reducer = (PlanUnwrappingReduceOperator<?, ?>) keyRemover.getInput();
		MapOperatorBase<?, ?, ?> keyExtractor = (MapOperatorBase<?, ?, ?>) reducer.getInput();

		// check the parallelisms
		assertEquals(1, keyExtractor.getParallelism());
		assertEquals(4, reducer.getParallelism());

		// check types
		TypeInformation<?> keyValueInfo = new TupleTypeInfo<Tuple2<StringValue, Tuple3<Double, StringValue, LongValue>>>(
				new ValueTypeInfo<StringValue>(StringValue.class),
				initialData.getType());

		assertEquals(initialData.getType(), keyExtractor.getOperatorInfo().getInputType());
		assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType());

		assertEquals(keyValueInfo, reducer.getOperatorInfo().getInputType());
		assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType());

		assertEquals(keyValueInfo, keyRemover.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), keyRemover.getOperatorInfo().getOutputType());

		// check keys
		assertEquals(KeyExtractingMapper.class, keyExtractor.getUserCodeWrapper().getUserCodeClass());

		assertTrue(keyExtractor.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}

Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#createLocalEnvironment()