Java Code Examples for org.apache.flink.api.java.DataSet#writeAsCsv()
The following examples show how to use
org.apache.flink.api.java.DataSet#writeAsCsv() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExecutionPlanCreationTest.java From flink with Apache License 2.0 | 6 votes |
@SuppressWarnings("serial") public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>"); return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0]) .fieldDelimiter("\t").types(Long.class, Long.class); DataSet<Tuple2<Long, Long>> result = input.map( new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { public Tuple2<Long, Long> map(Tuple2<Long, Long> value){ return new Tuple2<Long, Long>(value.f0, value.f1 + 1); } }); result.writeAsCsv(args[1], "\n", "\t"); env.execute(); }
Example 2
Source File: IncrementalSSSPITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testIncrementalSSSPNonSPEdge() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Vertex<Long, Double>> vertices = IncrementalSSSPData.getDefaultVertexDataSet(env); DataSet<Edge<Long, Double>> edges = IncrementalSSSPData.getDefaultEdgeDataSet(env); DataSet<Edge<Long, Double>> edgesInSSSP = IncrementalSSSPData.getDefaultEdgesInSSSP(env); // the edge to be removed is a non-SP edge Edge<Long, Double> edgeToBeRemoved = new Edge<>(3L, 5L, 5.0); Graph<Long, Double, Double> graph = Graph.fromDataSet(vertices, edges, env); // Assumption: all minimum weight paths are kept Graph<Long, Double, Double> ssspGraph = Graph.fromDataSet(vertices, edgesInSSSP, env); // remove the edge graph.removeEdge(edgeToBeRemoved); // configure the iteration ScatterGatherConfiguration parameters = new ScatterGatherConfiguration(); if (IncrementalSSSP.isInSSSP(edgeToBeRemoved, edgesInSSSP)) { parameters.setDirection(EdgeDirection.IN); parameters.setOptDegrees(true); // run the scatter gather iteration to propagate info Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration( new IncrementalSSSP.InvalidateMessenger(edgeToBeRemoved), new IncrementalSSSP.VertexDistanceUpdater(), IncrementalSSSPData.NUM_VERTICES, parameters); DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices(); resultedVertices.writeAsCsv(resultPath, "\n", ","); env.execute(); } else { vertices.writeAsCsv(resultPath, "\n", ","); env.execute(); } expected = IncrementalSSSPData.VERTICES; }
Example 3
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 4
Source File: ConnectedComponentsWithObjectMapITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 5
Source File: SingleSourceShortestPaths.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env); Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env); // Execute the scatter-gather iteration Graph<Long, Double, Double> result = graph.runScatterGatherIteration( new MinDistanceMessenger(), new VertexDistanceUpdater(), maxIterations); // Extract the vertices as the result DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices(); // emit result if (fileOutput) { singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ","); // since file sinks are lazy, we trigger the execution explicitly env.execute("Single Source Shortest Paths Example"); } else { singleSourceShortestPaths.print(); } }
Example 6
Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0 | 5 votes |
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
Example 7
Source File: GSASingleSourceShortestPaths.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Edge<Long, Double>> edges = getEdgeDataSet(env); Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(srcVertexId), env); // Execute the GSA iteration Graph<Long, Double, Double> result = graph.runGatherSumApplyIteration( new CalculateDistances(), new ChooseMinDistance(), new UpdateDistance(), maxIterations); // Extract the vertices as the result DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices(); // emit result if (fileOutput) { singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ","); // since file sinks are lazy, we trigger the execution explicitly env.execute("GSA Single Source Shortest Paths"); } else { singleSourceShortestPaths.print(); } }
Example 8
Source File: WordCount.java From flink-simple-tutorial with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataSet<String> text; if (params.has("input")) { // read the text file from given input path text = env.readTextFile(params.get("input")); } else { // get default test text data System.out.println("Executing WordCount example with default input data set."); System.out.println("Use --input to specify file input."); text = WordCountData.getDefaultTextLineDataSet(env); } DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .sum(1); // emit result if (params.has("output")) { counts.writeAsCsv(params.get("output"), "\n", " "); // execute program env.execute("WordCount Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print(); } }
Example 9
Source File: PregelSSSP.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env); Graph<Long, Double, Double> graph = Graph.fromDataSet(edges, new InitVertices(), env); // Execute the vertex-centric iteration Graph<Long, Double, Double> result = graph.runVertexCentricIteration( new SSSPComputeFunction(srcVertexId), new SSSPCombiner(), maxIterations); // Extract the vertices as the result DataSet<Vertex<Long, Double>> singleSourceShortestPaths = result.getVertices(); // emit result if (fileOutput) { singleSourceShortestPaths.writeAsCsv(outputPath, "\n", ","); env.execute("Pregel Single Source Shortest Paths Example"); } else { singleSourceShortestPaths.print(); } }
Example 10
Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0 | 5 votes |
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get the data set final DataSet<StringTriple> file = getDataSet(env, params); // filter lines with empty fields final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter()); // Here, we could do further processing with the filtered lines... JobExecutionResult result; // output the filtered lines if (params.has("output")) { filteredLines.writeAsCsv(params.get("output")); // execute program result = env.execute("Accumulator example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); filteredLines.print(); result = env.getLastJobExecutionResult(); } // get the accumulator result via its registration key final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR); System.out.format("Number of detected empty fields per column: %s\n", emptyFields); }
Example 11
Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0 | 4 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new UpdateComponentIdMatchNonPreserving()); DataSet<Tuple2<Long, Long>> delta; if (extraMapper) { delta = changes.map( // ID Mapper new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { private static final long serialVersionUID = -3929364091829757322L; @Override public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception { return v; } }); } else { delta = changes; } // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 12
Source File: ConnectedComponentsWithDeferredUpdateITCase.java From flink with Apache License 2.0 | 4 votes |
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new UpdateComponentIdMatchNonPreserving()); DataSet<Tuple2<Long, Long>> delta; if (extraMapper) { delta = changes.map( // ID Mapper new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { private static final long serialVersionUID = -3929364091829757322L; @Override public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception { return v; } }); } else { delta = changes; } // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
Example 13
Source File: WebLogAnalysis.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); // get input data DataSet<Tuple2<String, String>> documents = getDocumentsDataSet(env, params); DataSet<Tuple3<Integer, String, Integer>> ranks = getRanksDataSet(env, params); DataSet<Tuple2<String, String>> visits = getVisitsDataSet(env, params); // Retain documents with keywords DataSet<Tuple1<String>> filterDocs = documents .filter(new FilterDocByKeyWords()) .project(0); // Filter ranks by minimum rank DataSet<Tuple3<Integer, String, Integer>> filterRanks = ranks .filter(new FilterByRank()); // Filter visits by visit date DataSet<Tuple1<String>> filterVisits = visits .filter(new FilterVisitsByDate()) .project(0); // Join the filtered documents and ranks, i.e., get all URLs with min rank and keywords DataSet<Tuple3<Integer, String, Integer>> joinDocsRanks = filterDocs.join(filterRanks) .where(0).equalTo(1) .projectSecond(0, 1, 2); // Anti-join urls with visits, i.e., retain all URLs which have NOT been visited in a certain time DataSet<Tuple3<Integer, String, Integer>> result = joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits()); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("WebLogAnalysis Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 14
Source File: IncrementalSSSP.java From flink with Apache License 2.0 | 4 votes |
public static void main(String [] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Edge<Long, Double> edgeToBeRemoved = getEdgeToBeRemoved(); Graph<Long, Double, Double> graph = IncrementalSSSP.getGraph(env); // Assumption: all minimum weight paths are kept Graph<Long, Double, Double> ssspGraph = IncrementalSSSP.getSSSPGraph(env); // remove the edge graph.removeEdge(edgeToBeRemoved); // configure the iteration ScatterGatherConfiguration parameters = new ScatterGatherConfiguration(); if (isInSSSP(edgeToBeRemoved, ssspGraph.getEdges())) { parameters.setDirection(EdgeDirection.IN); parameters.setOptDegrees(true); // run the scatter-gather iteration to propagate info Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(new InvalidateMessenger(edgeToBeRemoved), new VertexDistanceUpdater(), maxIterations, parameters); DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices(); // Emit results if (fileOutput) { resultedVertices.writeAsCsv(outputPath, "\n", ","); env.execute("Incremental SSSP Example"); } else { resultedVertices.print(); } } else { // print the vertices if (fileOutput) { graph.getVertices().writeAsCsv(outputPath, "\n", ","); env.execute("Incremental SSSP Example"); } else { graph.getVertices().print(); } } }
Example 15
Source File: TPCHQuery10.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (!params.has("customer") && !params.has("orders") && !params.has("lineitem") && !params.has("nation")) { System.err.println(" This program expects data from the TPC-H benchmark as input data."); System.err.println(" Due to legal restrictions, we can not ship generated data."); System.err.println(" You can find the TPC-H data generator at http://www.tpc.org/tpch/."); System.err.println(" Usage: TPCHQuery10 --customer <path> --orders <path> --lineitem <path> --nation <path> [--output <path>]"); return; } // get customer data set: (custkey, name, address, nationkey, acctbal) DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env, params.get("customer")); // get orders data set: (orderkey, custkey, orderdate) DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env, params.get("orders")); // get lineitem data set: (orderkey, extendedprice, discount, returnflag) DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env, params.get("lineitem")); // get nation data set: (nationkey, name) DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env, params.get("nation")); // orders filtered by year: (orderkey, custkey) DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear = // filter by year orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990) // project fields out that are no longer required .project(0, 1); // lineitems filtered by flag: (orderkey, revenue) DataSet<Tuple2<Integer, Double>> lineitemsFilteredByFlag = // filter by flag lineitems.filter(lineitem -> lineitem.f3.equals("R")) // compute revenue and project out return flag // revenue per item = l_extendedprice * (1 - l_discount) .map(lineitem -> new Tuple2<>(lineitem.f0, lineitem.f1 * (1 - lineitem.f2))) .returns(Types.TUPLE(Types.INT, Types.DOUBLE)); // for lambda with generics // join orders with lineitems: (custkey, revenue) DataSet<Tuple2<Integer, Double>> revenueByCustomer = ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1); revenueByCustomer = revenueByCustomer.groupBy(0).aggregate(Aggregations.SUM, 1); // join customer with nation (custkey, name, address, nationname, acctbal) DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0, 1, 2).projectSecond(1).projectFirst(4); // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue) DataSet<Tuple6<Integer, String, String, String, Double, Double>> result = customerWithNation.join(revenueByCustomer) .where(0).equalTo(0) .projectFirst(0, 1, 2, 3, 4).projectSecond(1); // emit result if (params.has("output")) { result.writeAsCsv(params.get("output"), "\n", "|"); // execute program env.execute("TPCH Query 10 Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); result.print(); } }
Example 16
Source File: EuclideanGraphWeighing.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Vertex<Long, Point>> vertices = getVerticesDataSet(env); DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env); Graph<Long, Point, Double> graph = Graph.fromDataSet(vertices, edges, env); // the edge value will be the Euclidean distance between its src and trg vertex DataSet<Tuple3<Long, Long, Double>> edgesWithEuclideanWeight = graph.getTriplets() .map(new MapFunction<Triplet<Long, Point, Double>, Tuple3<Long, Long, Double>>() { @Override public Tuple3<Long, Long, Double> map(Triplet<Long, Point, Double> triplet) throws Exception { Vertex<Long, Point> srcVertex = triplet.getSrcVertex(); Vertex<Long, Point> trgVertex = triplet.getTrgVertex(); return new Tuple3<>(srcVertex.getId(), trgVertex.getId(), srcVertex.getValue().euclideanDistance(trgVertex.getValue())); } }); Graph<Long, Point, Double> resultedGraph = graph.joinWithEdges(edgesWithEuclideanWeight, new EdgeJoinFunction<Double, Double>() { public Double edgeJoin(Double edgeValue, Double inputValue) { return inputValue; } }); // retrieve the edges from the final result DataSet<Edge<Long, Double>> result = resultedGraph.getEdges(); // emit result if (fileOutput) { result.writeAsCsv(outputPath, "\n", ","); // since file sinks are lazy, we trigger the execution explicitly env.execute("Euclidean Graph Weighing Example"); } else { result.print(); } }
Example 17
Source File: EnumTriangles.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); // set up execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // read input data DataSet<Edge> edges; if (params.has("edges")) { edges = env.readCsvFile(params.get("edges")) .fieldDelimiter(" ") .includeFields(true, true) .types(Integer.class, Integer.class) .map(new TupleEdgeConverter()); } else { System.out.println("Executing EnumTriangles example with default edges data set."); System.out.println("Use --edges to specify file input."); edges = EnumTrianglesData.getDefaultEdgeDataSet(env); } // project edges by vertex id DataSet<Edge> edgesById = edges .map(new EdgeByIdProjector()); DataSet<Triad> triangles = edgesById // build triads .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder()) // filter triads .join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter()); // emit result if (params.has("output")) { triangles.writeAsCsv(params.get("output"), "\n", ","); // execute program env.execute("Basic Triangle Enumeration Example"); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); triangles.print(); } }
Example 18
Source File: TPCHQuery3Parquet.java From parquet-flinktacular with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { long startTime = System.currentTimeMillis(); if (!parseParameters(args)) { return; } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<Lineitem> lineitems = getLineitemDataSet(env).map(new MapLineitems()); DataSet<Order> orders = getOrdersDataSet(env).map(new MapOrders()); DataSet<Customer> customers = getCustomerDataSet(env).map(new MapCustomers()); // Join customers with orders and package them into a ShippingPriorityItem DataSet<ShippingPriorityItem> customerWithOrders = customers.join(orders).where(0).equalTo(1) .with( new JoinFunction<Customer, Order, ShippingPriorityItem>() { @Override public ShippingPriorityItem join(Customer c, Order o) { return new ShippingPriorityItem(o.getOrderKey(), 0.0, o.getOrderdate(), o.getShippriority()); } }); // Join the last join result with Lineitems DataSet<ShippingPriorityItem> result = customerWithOrders.join(lineitems).where(0).equalTo(0) .with( new JoinFunction<ShippingPriorityItem, Lineitem, ShippingPriorityItem>() { @Override public ShippingPriorityItem join(ShippingPriorityItem i, Lineitem l) { i.setRevenue(l.getExtendedprice() * (1 - l.getDiscount())); return i; } }) // Group by l_orderkey, o_orderdate and o_shippriority and compute revenue sum .groupBy(0, 2, 3) .aggregate(Aggregations.SUM, 1); // emit result result.writeAsCsv(outputPath, "\n", "|"); // execute program env.execute("TPCH Query 3 - Parquet input"); System.out.println("Execution time: " + (System.currentTimeMillis() - startTime)); }
Example 19
Source File: IncrementalSSSP.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String [] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Edge<Long, Double> edgeToBeRemoved = getEdgeToBeRemoved(); Graph<Long, Double, Double> graph = IncrementalSSSP.getGraph(env); // Assumption: all minimum weight paths are kept Graph<Long, Double, Double> ssspGraph = IncrementalSSSP.getSSSPGraph(env); // remove the edge graph.removeEdge(edgeToBeRemoved); // configure the iteration ScatterGatherConfiguration parameters = new ScatterGatherConfiguration(); if (isInSSSP(edgeToBeRemoved, ssspGraph.getEdges())) { parameters.setDirection(EdgeDirection.IN); parameters.setOptDegrees(true); // run the scatter-gather iteration to propagate info Graph<Long, Double, Double> result = ssspGraph.runScatterGatherIteration(new InvalidateMessenger(edgeToBeRemoved), new VertexDistanceUpdater(), maxIterations, parameters); DataSet<Vertex<Long, Double>> resultedVertices = result.getVertices(); // Emit results if (fileOutput) { resultedVertices.writeAsCsv(outputPath, "\n", ","); env.execute("Incremental SSSP Example"); } else { resultedVertices.print(); } } else { // print the vertices if (fileOutput) { graph.getVertices().writeAsCsv(outputPath, "\n", ","); env.execute("Incremental SSSP Example"); } else { graph.getVertices().print(); } } }
Example 20
Source File: EuclideanGraphWeighing.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { if (!parseParameters(args)) { return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Vertex<Long, Point>> vertices = getVerticesDataSet(env); DataSet<Edge<Long, Double>> edges = getEdgesDataSet(env); Graph<Long, Point, Double> graph = Graph.fromDataSet(vertices, edges, env); // the edge value will be the Euclidean distance between its src and trg vertex DataSet<Tuple3<Long, Long, Double>> edgesWithEuclideanWeight = graph.getTriplets() .map(new MapFunction<Triplet<Long, Point, Double>, Tuple3<Long, Long, Double>>() { @Override public Tuple3<Long, Long, Double> map(Triplet<Long, Point, Double> triplet) throws Exception { Vertex<Long, Point> srcVertex = triplet.getSrcVertex(); Vertex<Long, Point> trgVertex = triplet.getTrgVertex(); return new Tuple3<>(srcVertex.getId(), trgVertex.getId(), srcVertex.getValue().euclideanDistance(trgVertex.getValue())); } }); Graph<Long, Point, Double> resultedGraph = graph.joinWithEdges(edgesWithEuclideanWeight, new EdgeJoinFunction<Double, Double>() { public Double edgeJoin(Double edgeValue, Double inputValue) { return inputValue; } }); // retrieve the edges from the final result DataSet<Edge<Long, Double>> result = resultedGraph.getEdges(); // emit result if (fileOutput) { result.writeAsCsv(outputPath, "\n", ","); // since file sinks are lazy, we trigger the execution explicitly env.execute("Euclidean Graph Weighing Example"); } else { result.print(); } }