org.apache.flink.api.java.DataSet#writeAsCsv

Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0

6 votes

@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>");
		return;
	}

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0])
			.fieldDelimiter("\t").types(Long.class, Long.class);

	DataSet<Tuple2<Long, Long>> result = input.map(
			new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				public Tuple2<Long, Long> map(Tuple2<Long, Long> value){
					return new Tuple2<Long, Long>(value.f0, value.f1 + 1);
				}
	});
	result.writeAsCsv(args[1], "\n", "\t");
	env.execute();
}

Source File: IncrementalSSSPITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testIncrementalSSSPNonSPEdge() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Vertex<Long, Double>> vertices = IncrementalSSSPData.getDefaultVertexDataSet(env);
	DataSet<Edge<Long, Double>> edges = IncrementalSSSPData.getDefaultEdgeDataSet(env);
	DataSet<Edge<Long, Double>> edgesInSSSP = IncrementalSSSPData.getDefaultEdgesInSSSP(env);
	// the edge to be removed is a non-SP edge
	Edge<Long, Double> edgeToBeRemoved = new Edge<>(3L, 5L, 5.0);

	Graph<Long, Double, Double> graph = Graph.fromDataSet(vertices, edges, env);
	// Assumption: all minimum weight paths are kept
	Graph<Long, Double, Double> ssspGraph = Graph.fromDataSet(vertices, edgesInSSSP, env);
	// remove the edge
	graph.removeEdge(edgeToBeRemoved);

	// configure the iteration
	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	if (IncrementalSSSP.isInSSSP(edgeToBeRemoved, edgesInSSSP)) {

		parameters.setDirection(EdgeDirection.IN);
		parameters.setOptDegrees(true);

		// run the scatter gather iteration to propagate info
		Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(
				new IncrementalSSSP.InvalidateMessenger(edgeToBeRemoved),
				new IncrementalSSSP.VertexDistanceUpdater(),
				IncrementalSSSPData.NUM_VERTICES, parameters);

		DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

		resultedVertices.writeAsCsv(resultPath, "\n", ",");
		env.execute();
	} else {
		vertices.writeAsCsv(resultPath, "\n", ",");
		env.execute();
	}

	expected = IncrementalSSSPData.VERTICES;
}

Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: ConnectedComponentsWithObjectMapITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: SingleSourceShortestPaths.java From Flink-CEPplus with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);

		// Execute the scatter-gather iteration
		Graph<Long, Double, Double> result = graph.runScatterGatherIteration(
				new MinDistanceMessenger(), new VertexDistanceUpdater(), maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Single Source Shortest Paths Example");
		} else {
			singleSourceShortestPaths.print();
		}

	}

Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: GSASingleSourceShortestPaths.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgeDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env);

		// Execute the GSA iteration
		Graph<Long, Double, Double> result = graph.runGatherSumApplyIteration(
				new CalculateDistances(), new ChooseMinDistance(), new UpdateDistance(), maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("GSA Single Source Shortest Paths");
		} else {
			singleSourceShortestPaths.print();
		}

	}

Source File: WordCount.java From flink-simple-tutorial with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final ParameterTool params = ParameterTool.fromArgs(args);

        // set up the execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // make parameters available in the web interface
        env.getConfig().setGlobalJobParameters(params);

        // get input data
        DataSet<String> text;
        if (params.has("input")) {
            // read the text file from given input path
            text = env.readTextFile(params.get("input"));
        } else {
            // get default test text data
            System.out.println("Executing WordCount example with default input data set.");
            System.out.println("Use --input to specify file input.");
            text = WordCountData.getDefaultTextLineDataSet(env);
        }

        DataSet<Tuple2<String, Integer>> counts =
                // split up the lines in pairs (2-tuples) containing: (word,1)
                text.flatMap(new Tokenizer())
                        // group by the tuple field "0" and sum up tuple field "1"
                        .groupBy(0)
                        .sum(1);

        // emit result
        if (params.has("output")) {
            counts.writeAsCsv(params.get("output"), "\n", " ");
            // execute program
            env.execute("WordCount Example");
        } else {
            System.out.println("Printing result to stdout. Use --output to specify output path.");
            counts.print();
        }

    }

Source File: PregelSSSP.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(), env);

		// Execute the vertex-centric iteration
		Graph<Long, Double, Double> result = graph.runVertexCentricIteration(
				new SSSPComputeFunction(srcVertexId), new SSSPCombiner(),
				maxIterations);

		// Extract the vertices as the result
		DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices();

		// emit result
		if (fileOutput) {
			singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ",");
			env.execute("Pregel Single Source Shortest Paths Example");
		} else {
			singleSourceShortestPaths.print();
		}

	}

Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}

Source File: WebLogAnalysis.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params);
		DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params);
		DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params);

		// Retain documents with keywords
		DataSet<Tuple1<String>> filterDocs = documents
				.filter(new FilterDocByKeyWords())
				.project(0);

		// Filter ranks by minimum rank
		DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks
				.filter(new FilterByRank());

		// Filter visits by visit date
		DataSet<Tuple1<String>> filterVisits = visits
				.filter(new FilterVisitsByDate())
				.project(0);

		// Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords
		DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks =
				filterDocs.join(filterRanks)
							.where(0).equalTo(1)
							.projectSecond(0, 1, 2);

		// Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time
		DataSet<Tuple3<Integer, String, Integer>> result =
				joinDocsRanks.coGroup(filterVisits)
								.where(1).equalTo(0)
								.with(new AntiJoinVisits());

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("WebLogAnalysis Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}

Source File: IncrementalSSSP.java From flink with Apache License 2.0

4 votes

public static void main(String [] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		Edge<Long, Double> edgeToBeRemoved = getEdgeToBeRemoved();

		Graph<Long, Double, Double> graph = IncrementalSSSP.getGraph(env);

		// Assumption: all minimum weight paths are kept
		Graph<Long, Double, Double> ssspGraph = IncrementalSSSP.getSSSPGraph(env);

		// remove the edge
		graph.removeEdge(edgeToBeRemoved);

		// configure the iteration
		ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

		if (isInSSSP(edgeToBeRemoved, ssspGraph.getEdges())) {

			parameters.setDirection(EdgeDirection.IN);
			parameters.setOptDegrees(true);

			// run the scatter-gather iteration to propagate info
			Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(new InvalidateMessenger(edgeToBeRemoved),
					new VertexDistanceUpdater(), maxIterations, parameters);

			DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

			// Emit results
			if (fileOutput) {
				resultedVertices.writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				resultedVertices.print();
			}
		} else {
			// print the vertices
			if (fileOutput) {
				graph.getVertices().writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				graph.getVertices().print();
			}
		}
	}

Source File: TPCHQuery10.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) {
			System.err.println("  This program expects data from the TPC-H benchmark as input data.");
			System.err.println("  Due to legal restrictions, we can not ship generated data.");
			System.err.println("  You can find the TPC-H data generator at http://www.tpc.org/tpch/.");
			System.err.println("  Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]");
			return;
		}

		// get customer data set: (custkey, name, address, nationkey, acctbal)
		DataSet<Tuple5<Integer, String, String, Integer, Double>> customers =
			getCustomerDataSet(env, params.get("customer"));
		// get orders data set: (orderkey, custkey, orderdate)
		DataSet<Tuple3<Integer, Integer, String>> orders =
			getOrdersDataSet(env, params.get("orders"));
		// get lineitem data set: (orderkey, extendedprice, discount, returnflag)
		DataSet<Tuple4<Integer, Double, Double, String>> lineitems =
			getLineitemDataSet(env, params.get("lineitem"));
		// get nation data set: (nationkey, name)
		DataSet<Tuple2<Integer, String>> nations =
			getNationsDataSet(env, params.get("nation"));

		// orders filtered by year: (orderkey, custkey)
		DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear =
				// filter by year
				orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990)
				// project fields out that are no longer required
				.project(0, 1);

		// lineitems filtered by flag: (orderkey, revenue)
		DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag =
				// filter by flag
				lineitems.filter(lineitem -> lineitem.f3.equals("R"))
				// compute revenue and project out return flag
				// revenue per item = l_extendedprice * (1 - l_discount)
				.map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2)))
				.returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics

		// join orders with lineitems: (custkey, revenue)
		DataSet<Tuple2<Integer, Double>> revenueByCustomer =
				ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
									.where(0).equalTo(0)
									.projectFirst(1).projectSecond(1);

		revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1);

		// join customer with nation (custkey, name, address, nationname, acctbal)
		DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers
						.joinWithTiny(nations)
						.where(3).equalTo(0)
						.projectFirst(0, 1, 2).projectSecond(1).projectFirst(4);

		// join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
		DataSet<Tuple6<Integer, String, String, String, Double, Double>> result =
				customerWithNation.join(revenueByCustomer)
				.where(0).equalTo(0)
				.projectFirst(0, 1, 2, 3, 4).projectSecond(1);

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", "|");
			// execute program
			env.execute("TPCH Query 10 Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}

	}

Source File: EuclideanGraphWeighing.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Vertex<Long, Point>> vertices = getVerticesDataSet(env);

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Point, Double> graph = Graph.fromDataSet(vertices, edges, env);

		// the edge value will be the Euclidean distance between its src and trg vertex
		DataSet<Tuple3<Long, Long, Double>> edgesWithEuclideanWeight = graph.getTriplets()
				.map(new MapFunction<Triplet<Long, Point, Double>, Tuple3<Long, Long, Double>>() {

					@Override
					public Tuple3<Long, Long, Double> map(Triplet<Long, Point, Double> triplet)
							throws Exception {

						Vertex<Long, Point> srcVertex = triplet.getSrcVertex();
						Vertex<Long, Point> trgVertex = triplet.getTrgVertex();

						return new Tuple3<>(srcVertex.getId(), trgVertex.getId(),
							srcVertex.getValue().euclideanDistance(trgVertex.getValue()));
					}
				});

		Graph<Long, Point, Double> resultedGraph = graph.joinWithEdges(edgesWithEuclideanWeight,
				new EdgeJoinFunction<Double, Double>() {

					public Double edgeJoin(Double edgeValue, Double inputValue) {
						return inputValue;
					}
				});

		// retrieve the edges from the final result
		DataSet<Edge<Long, Double>> result = resultedGraph.getEdges();

		// emit result
		if (fileOutput) {
			result.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Euclidean Graph Weighing Example");
		} else {
			result.print();
		}

	}

Source File: EnumTriangles.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read input data
		DataSet<Edge> edges;
		if (params.has("edges")) {
			edges = env.readCsvFile(params.get("edges"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.types(Integer.class, Integer.class)
					.map(new TupleEdgeConverter());
		} else {
			System.out.println("Executing EnumTriangles example with default edges data set.");
			System.out.println("Use --edges to specify file input.");
			edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
		}

		// project edges by vertex id
		DataSet<Edge> edgesById = edges
				.map(new EdgeByIdProjector());

		DataSet<Triad> triangles = edgesById
				// build triads
				.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
				// filter triads
				.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

		// emit result
		if (params.has("output")) {
			triangles.writeAsCsv(params.get("output"), "\n", ",");
			// execute program
			env.execute("Basic Triangle Enumeration Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			triangles.print();
		}
	}

Source File: TPCHQuery3Parquet.java From parquet-flinktacular with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


		// get input data
		DataSet<Lineitem> lineitems = getLineitemDataSet(env).map(new MapLineitems());
		DataSet<Order> orders = getOrdersDataSet(env).map(new MapOrders());
		DataSet<Customer> customers = getCustomerDataSet(env).map(new MapCustomers());
		
		// Join customers with orders and package them into a ShippingPriorityItem
		DataSet<ShippingPriorityItem> customerWithOrders =
			customers.join(orders).where(0).equalTo(1)
				.with(
					new JoinFunction<Customer, Order, ShippingPriorityItem>() {
						@Override
						public ShippingPriorityItem join(Customer c, Order o) {
							return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(),
								o.getShippriority());
						}
					});

		// Join the last join result with Lineitems
		DataSet<ShippingPriorityItem> result =
			customerWithOrders.join(lineitems).where(0).equalTo(0)
				.with(
					new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() {
						@Override
						public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) {
							i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount()));
							return i;
						}
					})
					// Group by l_orderkey, o_orderdate and o_shippriority and compute revenue sum
				.groupBy(0, 2, 3)
				.aggregate(Aggregations.SUM, 1);

		// emit result
		result.writeAsCsv(outputPath, "\n", "|");

		// execute program
		env.execute("TPCH Query 3 - Parquet input");

		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}

Source File: IncrementalSSSP.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String [] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		Edge<Long, Double> edgeToBeRemoved = getEdgeToBeRemoved();

		Graph<Long, Double, Double> graph = IncrementalSSSP.getGraph(env);

		// Assumption: all minimum weight paths are kept
		Graph<Long, Double, Double> ssspGraph = IncrementalSSSP.getSSSPGraph(env);

		// remove the edge
		graph.removeEdge(edgeToBeRemoved);

		// configure the iteration
		ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

		if (isInSSSP(edgeToBeRemoved, ssspGraph.getEdges())) {

			parameters.setDirection(EdgeDirection.IN);
			parameters.setOptDegrees(true);

			// run the scatter-gather iteration to propagate info
			Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(new InvalidateMessenger(edgeToBeRemoved),
					new VertexDistanceUpdater(), maxIterations, parameters);

			DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices();

			// Emit results
			if (fileOutput) {
				resultedVertices.writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				resultedVertices.print();
			}
		} else {
			// print the vertices
			if (fileOutput) {
				graph.getVertices().writeAsCsv(outputPath, "\n", ",");
				env.execute("Incremental SSSP Example");
			} else {
				graph.getVertices().print();
			}
		}
	}

Source File: EuclideanGraphWeighing.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Vertex<Long, Point>> vertices = getVerticesDataSet(env);

		DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env);

		Graph<Long, Point, Double> graph = Graph.fromDataSet(vertices, edges, env);

		// the edge value will be the Euclidean distance between its src and trg vertex
		DataSet<Tuple3<Long, Long, Double>> edgesWithEuclideanWeight = graph.getTriplets()
				.map(new MapFunction<Triplet<Long, Point, Double>, Tuple3<Long, Long, Double>>() {

					@Override
					public Tuple3<Long, Long, Double> map(Triplet<Long, Point, Double> triplet)
							throws Exception {

						Vertex<Long, Point> srcVertex = triplet.getSrcVertex();
						Vertex<Long, Point> trgVertex = triplet.getTrgVertex();

						return new Tuple3<>(srcVertex.getId(), trgVertex.getId(),
							srcVertex.getValue().euclideanDistance(trgVertex.getValue()));
					}
				});

		Graph<Long, Point, Double> resultedGraph = graph.joinWithEdges(edgesWithEuclideanWeight,
				new EdgeJoinFunction<Double, Double>() {

					public Double edgeJoin(Double edgeValue, Double inputValue) {
						return inputValue;
					}
				});

		// retrieve the edges from the final result
		DataSet<Edge<Long, Double>> result = resultedGraph.getEdges();

		// emit result
		if (fileOutput) {
			result.writeAsCsv(outputPath, "\n", ",");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("Euclidean Graph Weighing Example");
		} else {
			result.print();
		}

	}

Java Code Examples for org.apache.flink.api.java.DataSet#writeAsCsv()