Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#createInput()
The following examples show how to use
org.apache.flink.api.java.ExecutionEnvironment#createInput() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroTypeExtractionTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testKeySelection() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); Path in = new Path(inFile.getAbsoluteFile().toURI()); AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class); DataSet<User> usersDS = env.createInput(users); DataSet<Tuple2<String, Integer>> res = usersDS .groupBy("name") .reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> { for (User u : values) { out.collect(new Tuple2<>(u.getName().toString(), 1)); } }) .returns(Types.TUPLE(Types.STRING, Types.INT)); res.writeAsText(resultPath); env.execute("Avro Key selection"); expected = "(Alyssa,1)\n(Charlie,1)\n"; }
Example 2
Source File: TPCDSQuery55Parquet.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
private static DataSet<Tuple2<Void, DateDimTable>> getDataDimDataSet(ExecutionEnvironment env) throws IOException { Job job = Job.getInstance(); //Schema projection ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class); job.getConfiguration().set("parquet.thrift.column.filter", "d_date_sk;d_year;d_moy"); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, DateDimTable.class, job); // Filter LongColumn moy = longColumn("d_moy"); LongColumn year = longColumn("d_year"); FilterPredicate moyPred = eq(moy, 11L); FilterPredicate yearPred = eq(year, 1999L); FilterPredicate constraint = and(moyPred, yearPred); ParquetThriftInputFormat.setFilterPredicate(job.getConfiguration(), constraint); ParquetThriftInputFormat.addInputPath(job, new Path(datadimPath)); DataSet<Tuple2<Void, DateDimTable>> data = env.createInput(hadoopInputFormat); return data; }
Example 3
Source File: ParquetThriftExample.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
public static DataSet<Tuple2<Void, Person>> readThrift(ExecutionEnvironment env, String inputPath) throws IOException { Job job = Job.getInstance(); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, Person .class, job); // schema projection: don't read attributes id and email job.getConfiguration().set("parquet.thrift.column.filter", "name;id;email;phone/number"); FileInputFormat.addInputPath(job, new Path(inputPath)); // push down predicates: get all persons with name = "Felix" BinaryColumn name = binaryColumn("name"); FilterPredicate namePred = eq(name, Binary.fromString("Felix")); ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred); DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat); return data; }
Example 4
Source File: AvroTypeExtractionTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testWithKryoGenericSer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableForceKryo(); Path in = new Path(inFile.getAbsoluteFile().toURI()); AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class); DataSet<User> usersDS = env.createInput(users); DataSet<Tuple2<String, Integer>> res = usersDS .groupBy((KeySelector<User, String>) value -> String.valueOf(value.getName())) .reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> { for (User u : values) { out.collect(new Tuple2<>(u.getName().toString(), 1)); } }) .returns(Types.TUPLE(Types.STRING, Types.INT)); res.writeAsText(resultPath); env.execute("Avro Key selection"); expected = "(Charlie,1)\n(Alyssa,1)\n"; }
Example 5
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind rebalance. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindRebalance() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .rebalance() .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example 6
Source File: FlinkUtil.java From kylin with Apache License 2.0 | 6 votes |
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass, Class valueClass) throws IOException { List<String> inputFolders = Lists.newArrayList(); Path inputHDFSPath = new Path(inputPath); FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath); boolean hasDir = false; for (FileStatus stat : fileStatuses) { if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) { hasDir = true; inputFolders.add(stat.getPath().toString()); } } if (!hasDir) { return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString())); } Job job = Job.getInstance(); FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ",")); return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job)); }
Example 7
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example 8
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind flatMap. */ @Test public void checkJoinWithReplicatedSourceInputBehindFlatMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .flatMap(new IdFlatMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 9
Source File: JoinCancelingITCase.java From flink with Apache License 2.0 | 5 votes |
private void executeTask(JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner, boolean slow, int parallelism) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new InfiniteIntegerTupleInputFormat(slow)); DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new InfiniteIntegerTupleInputFormat(slow)); input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE) .where(0) .equalTo(0) .with(joiner) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); env.setParallelism(parallelism); runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000); }
Example 10
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 11
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests cross program with replicated data source. */ @Test public void checkCrossWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .cross(source2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when cross should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy(); ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2); }
Example 12
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example 13
Source File: JoinCancelingITCase.java From flink with Apache License 2.0 | 5 votes |
private void executeTask(JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner, boolean slow, int parallelism) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new InfiniteIntegerTupleInputFormat(slow)); DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new InfiniteIntegerTupleInputFormat(slow)); input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE) .where(0) .equalTo(0) .with(joiner) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); env.setParallelism(parallelism); runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000); }
Example 14
Source File: BatchPojoExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20) .mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0)) .collect(Collectors.toList()); DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos); ClusterBuilder clusterBuilder = new ClusterBuilder() { private static final long serialVersionUID = -1754532803757154795L; @Override protected Cluster buildCluster(Cluster.Builder builder) { return builder.addContactPoints("127.0.0.1").build(); } }; dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)})); env.execute("zhisheng"); /* * This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat. */ DataSet<CustomCassandraAnnotatedPojo> inputDS = env .createInput(new CassandraPojoInputFormat<>( SELECT_QUERY, clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)} )); inputDS.print(); }
Example 15
Source File: AvroTypeExtractionTest.java From flink with Apache License 2.0 | 4 votes |
private void testField(final String fieldName) throws Exception { before(); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Path in = new Path(inFile.getAbsoluteFile().toURI()); AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class); DataSet<User> usersDS = env.createInput(users); DataSet<Object> res = usersDS .groupBy(fieldName) .reduceGroup((GroupReduceFunction<User, Object>) (values, out) -> { for (User u : values) { out.collect(u.get(fieldName)); } }) .returns(Object.class); res.writeAsText(resultPath); env.execute("Simple Avro read job"); // test if automatic registration of the Types worked ExecutionConfig ec = env.getConfig(); Assert.assertTrue(ec.getRegisteredKryoTypes().contains(Fixed16.class)); switch (fieldName) { case "name": expected = "Alyssa\nCharlie"; break; case "type_enum": expected = "GREEN\nRED\n"; break; case "type_double_test": expected = "123.45\n1.337\n"; break; default: Assert.fail("Unknown field"); break; } after(); }
Example 16
Source File: FlinkPravegaInputFormatITCase.java From flink-connectors with Apache License 2.0 | 4 votes |
/** * Verifies that the input format: * - correctly reads all records in a given set of multiple Pravega streams * - allows multiple executions */ @Test public void testBatchInput() throws Exception { final int numElements1 = 100; final int numElements2 = 300; // set up the stream final String streamName1 = RandomStringUtils.randomAlphabetic(20); final String streamName2 = RandomStringUtils.randomAlphabetic(20); final Set<String> streams = new HashSet<>(); streams.add(streamName1); streams.add(streamName2); SETUP_UTILS.createTestStream(streamName1, 3); SETUP_UTILS.createTestStream(streamName2, 5); try ( final EventStreamWriter<Integer> eventWriter1 = SETUP_UTILS.getIntegerWriter(streamName1); final EventStreamWriter<Integer> eventWriter2 = SETUP_UTILS.getIntegerWriter(streamName2); // create the producer that writes to the stream final ThrottledIntegerWriter producer1 = new ThrottledIntegerWriter( eventWriter1, numElements1, numElements1 + 1, // no need to block writer for a batch test 0, false ); final ThrottledIntegerWriter producer2 = new ThrottledIntegerWriter( eventWriter2, numElements2, numElements2 + 1, // no need to block writer for a batch test 0, false ) ) { // write batch input producer1.start(); producer2.start(); producer1.sync(); producer2.sync(); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); // simple pipeline that reads from Pravega and collects the events DataSet<Integer> integers = env.createInput( FlinkPravegaInputFormat.<Integer>builder() .forStream(streamName1) .forStream(streamName2) .withPravegaConfig(SETUP_UTILS.getPravegaConfig()) .withDeserializationSchema(new IntegerDeserializationSchema()) .build(), BasicTypeInfo.INT_TYPE_INFO ); // verify that all events were read Assert.assertEquals(numElements1 + numElements2, integers.collect().size()); // this verifies that the input format allows multiple passes Assert.assertEquals(numElements1 + numElements2, integers.collect().size()); } }
Example 17
Source File: TPCHQuery3Parquet.java From parquet-flinktacular with Apache License 2.0 | 4 votes |
private static DataSet<Tuple2<Void, OrderTable>> getOrdersDataSet(ExecutionEnvironment env) throws IOException { Job job = Job.getInstance(); ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class); job.getConfiguration().set("parquet.thrift.column.filter", "ID;CUSTKEY;ORDERDATE;SHIP_PRIORITY"); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, OrderTable.class, job); ParquetThriftInputFormat.addInputPath(job, new Path(ordersPath)); // Filter all Orders with o_orderdate < 12.03.1995 ParquetThriftInputFormat.setUnboundRecordFilter(job, OrderFilter.class); DataSet<Tuple2<Void, OrderTable>> data = env.createInput(hadoopInputFormat); return data; }
Example 18
Source File: TPCHQuery3Parquet.java From parquet-flinktacular with Apache License 2.0 | 4 votes |
private static DataSet<Tuple2<Void, LineitemTable>> getLineitemDataSet(ExecutionEnvironment env) throws IOException { Job job = Job.getInstance(); ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class); job.getConfiguration().set("parquet.thrift.column.filter", "ORDERKEY;EXTENDEDPRICE;DISCOUNT;SHIPDATE"); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, LineitemTable.class, job); // Filter all Lineitems with l_shipdate > 12.03.1995 ParquetThriftInputFormat.setUnboundRecordFilter(job, LineitemFilter.class); ParquetThriftInputFormat.addInputPath(job, new Path(lineitemPath)); DataSet<Tuple2<Void, LineitemTable>> data = env.createInput(hadoopInputFormat); return data; }
Example 19
Source File: AvroExternalJarProgram.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
public static void main(String[] args) throws Exception { String inputPath = args[0]; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<MyUser> input = env.createInput(new AvroInputFormat<MyUser>(new Path(inputPath), MyUser.class)); DataSet<Tuple2<String, MyUser>> result = input.map(new NameExtractor()).groupBy(0).reduce(new NameGrouper()); result.output(new DiscardingOutputFormat<Tuple2<String, MyUser>>()); env.execute(); }
Example 20
Source File: AvroExternalJarProgram.java From flink with Apache License 2.0 | 3 votes |
public static void main(String[] args) throws Exception { String inputPath = args[0]; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<MyUser> input = env.createInput(new AvroInputFormat<MyUser>(new Path(inputPath), MyUser.class)); DataSet<Tuple2<String, MyUser>> result = input.map(new NameExtractor()).groupBy(0).reduce(new NameGrouper()); result.output(new DiscardingOutputFormat<Tuple2<String, MyUser>>()); env.execute(); }