org.apache.flink.api.java.operators.DataSource Java Examples
The following examples show how to use
org.apache.flink.api.java.operators.DataSource.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaDistributeApp.java From 163-bigdate-note with GNU General Public License v3.0 | 7 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment(); String filePath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\hello.txt"; //1. 注册一个本地文件 environment.registerCachedFile(filePath, "java-cf"); DataSource<String> data = environment.fromElements("hadoop", "spark", "flink", "pyspark", "storm"); data.map(new RichMapFunction<String, String>() { List<String> list = new ArrayList<>(); @Override public void open(Configuration parameters) throws Exception { File file = getRuntimeContext().getDistributedCache().getFile("java-cf"); List<String> lines = FileUtils.readLines(file); for (String line : lines) { System.out.println("line: " + line); } } @Override public String map(String value) throws Exception { return value; } }).print(); }
Example #2
Source File: CSVReaderTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testFieldTypes() throws Exception { CsvReader reader = getCsvReader(); DataSource<Item> items = reader.tupleType(Item.class); TypeInformation<?> info = items.getType(); if (!info.isTupleType()) { Assert.fail(); } else { TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3)); } CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat(); Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes()); }
Example #3
Source File: ExecutionEnvironment.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Creates a new data set that contains the given elements. The framework will determine the type according to the * based type user supplied. The elements should be the same or be the subclass to the based type. * The sequence of elements must not be empty. * Note that this operation will result in a non-parallel data source, i.e. a data source with * a parallelism of one. * * @param type The base class type for every element in the collection. * @param data The elements to make up the data set. * @return A DataSet representing the given list of elements. */ @SafeVarargs public final <X> DataSource<X> fromElements(Class<X> type, X... data) { if (data == null) { throw new IllegalArgumentException("The data must not be null."); } if (data.length == 0) { throw new IllegalArgumentException("The number of elements must not be zero."); } TypeInformation<X> typeInfo; try { typeInfo = TypeExtractor.getForClass(type); } catch (Exception e) { throw new RuntimeException("Could not create TypeInformation for type " + type.getName() + "; please specify the TypeInformation manually via " + "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e); } return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName()); }
Example #4
Source File: ExecutionEnvironment.java From flink with Apache License 2.0 | 6 votes |
/** * Creates a new data set that contains the given elements. The framework will determine the type according to the * based type user supplied. The elements should be the same or be the subclass to the based type. * The sequence of elements must not be empty. * Note that this operation will result in a non-parallel data source, i.e. a data source with * a parallelism of one. * * @param type The base class type for every element in the collection. * @param data The elements to make up the data set. * @return A DataSet representing the given list of elements. */ @SafeVarargs public final <X> DataSource<X> fromElements(Class<X> type, X... data) { if (data == null) { throw new IllegalArgumentException("The data must not be null."); } if (data.length == 0) { throw new IllegalArgumentException("The number of elements must not be zero."); } TypeInformation<X> typeInfo; try { typeInfo = TypeExtractor.getForClass(type); } catch (Exception e) { throw new RuntimeException("Could not create TypeInformation for type " + type.getName() + "; please specify the TypeInformation manually via " + "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e); } return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName()); }
Example #5
Source File: CSVReaderTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSubClassWithPartialsInHierarchie() throws Exception { CsvReader reader = getCsvReader(); DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class); TypeInformation<?> info = sitems.getType(); Assert.assertEquals(true, info.isTupleType()); Assert.assertEquals(FinalItem.class, info.getTypeClass()); @SuppressWarnings("unchecked") TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass()); Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass()); Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass()); Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass()); CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat(); Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes()); }
Example #6
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); DataSource<String> dataSource = env.fromElements(WORDS); dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() { @Override public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception { String[] words = line.split("\\W+"); for (String word : words) { out.collect(new Tuple2<>(word, 1)); } } }) .groupBy(0) .sum(1) .print(); long count = dataSource.count(); System.out.println(count); }
Example #7
Source File: JavaTableSQLAPI.java From 163-bigdate-note with GNU General Public License v3.0 | 6 votes |
public static void main(String[] args) throws Exception { ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment(); BatchTableEnvironment tableEnvironment = BatchTableEnvironment.getTableEnvironment(environment); String filepath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\sales.csv"; //csv => DataSet DataSource<Sales> csv = environment.readCsvFile(filepath) .ignoreFirstLine() .pojoType(Sales.class, "transactionId", "customerId", "itemId", "amountPaid"); //csv.print(); Table sales = tableEnvironment.fromDataSet(csv); tableEnvironment.registerTable("sales", sales); Table resultTable = tableEnvironment.sqlQuery("select customerId, sum(amountPaid) money from sales group by customerId"); DataSet<Row> result = tableEnvironment.toDataSet(resultTable, Row.class); result.print(); }
Example #8
Source File: CSVReaderTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSubClassWithPartialsInHierarchie() throws Exception { CsvReader reader = getCsvReader(); DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class); TypeInformation<?> info = sitems.getType(); Assert.assertEquals(true, info.isTupleType()); Assert.assertEquals(FinalItem.class, info.getTypeClass()); @SuppressWarnings("unchecked") TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass()); Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass()); Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass()); Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass()); CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat(); Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes()); }
Example #9
Source File: CSVReaderTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSubClass() throws Exception { CsvReader reader = getCsvReader(); DataSource<SubItem> sitems = reader.tupleType(SubItem.class); TypeInformation<?> info = sitems.getType(); Assert.assertEquals(true, info.isTupleType()); Assert.assertEquals(SubItem.class, info.getTypeClass()); @SuppressWarnings("unchecked") TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3)); CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat(); Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes()); }
Example #10
Source File: CsvReader.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of * {@link Tuple}. The type information for the fields is obtained from the type class. The type * consequently needs to specify all generic field types of the tuple. * * @param targetType The class of the target type, needs to be a subclass of Tuple. * @return The DataSet representing the parsed CSV data. */ public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) { Preconditions.checkNotNull(targetType, "The target type class must not be null."); if (!Tuple.class.isAssignableFrom(targetType)) { throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName()); } @SuppressWarnings("unchecked") TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType); CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask); Class<?>[] classes = new Class<?>[typeInfo.getArity()]; for (int i = 0; i < typeInfo.getArity(); i++) { classes[i] = typeInfo.getTypeAt(i).getTypeClass(); } configureInputFormat(inputFormat); return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName()); }
Example #11
Source File: GraphGeneratorUtils.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Generates {@link Vertex Vertices} with sequential, numerical labels. * * @param env the Flink execution environment. * @param parallelism operator parallelism * @param vertexCount number of sequential vertex labels * @return {@link DataSet} of sequentially labeled {@link Vertex vertices} */ public static DataSet<Vertex<LongValue, NullValue>> vertexSequence(ExecutionEnvironment env, int parallelism, long vertexCount) { Preconditions.checkArgument(vertexCount >= 0, "Vertex count must be non-negative"); if (vertexCount == 0) { return env .fromCollection(Collections.emptyList(), TypeInformation.of(new TypeHint<Vertex<LongValue, NullValue>>(){})) .setParallelism(parallelism) .name("Empty vertex set"); } else { LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1); DataSource<LongValue> vertexLabels = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Vertex indices"); return vertexLabels .map(new CreateVertex()) .setParallelism(parallelism) .name("Vertex sequence"); } }
Example #12
Source File: ExecutionEnvironment.java From flink with Apache License 2.0 | 6 votes |
/** * Creates a new data set that contains the given elements. The elements must all be of the same type, * for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty. * * <p>The framework will try and determine the exact type from the collection elements. * In case of generic elements, it may be necessary to manually supply the type information * via {@link #fromCollection(Collection, TypeInformation)}. * * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with * a parallelism of one. * * @param data The elements to make up the data set. * @return A DataSet representing the given list of elements. */ @SafeVarargs public final <X> DataSource<X> fromElements(X... data) { if (data == null) { throw new IllegalArgumentException("The data must not be null."); } if (data.length == 0) { throw new IllegalArgumentException("The number of elements must not be zero."); } TypeInformation<X> typeInfo; try { typeInfo = TypeExtractor.getForObject(data[0]); } catch (Exception e) { throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName() + "; please specify the TypeInformation manually via " + "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e); } return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName()); }
Example #13
Source File: BootstrapTransformationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testOperatorSpecificMaxParallelismRespected() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSource<Integer> input = env.fromElements(0); BootstrapTransformation<Integer> transformation = OperatorTransformation .bootstrapWith(input) .setMaxParallelism(1) .transform(new ExampleStateBootstrapFunction()); int maxParallelism = transformation.getMaxParallelism(4); DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates( OperatorIDGenerator.fromUid("uid"), new MemoryStateBackend(), new Path(), maxParallelism ); Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result)); }
Example #14
Source File: CSVReaderTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSubClass() throws Exception { CsvReader reader = getCsvReader(); DataSource<SubItem> sitems = reader.tupleType(SubItem.class); TypeInformation<?> info = sitems.getType(); Assert.assertEquals(true, info.isTupleType()); Assert.assertEquals(SubItem.class, info.getTypeClass()); @SuppressWarnings("unchecked") TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info; Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3)); CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat(); Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes()); }
Example #15
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0) .splitsGroupedBy(1, 0); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #16
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource6() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField") .splitsOrderedBy("f1", new Order[]{Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #17
Source File: PartitionOperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testRangePartitionByComplexKeyWithOrders() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements( new Tuple2<>(new Tuple2<>(1, 1), 1), new Tuple2<>(new Tuple2<>(2, 2), 2), new Tuple2<>(new Tuple2<>(2, 2), 2) ); ds.partitionByRange(0, 1).withOrders(Order.ASCENDING, Order.DESCENDING); }
Example #18
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource5() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f2") .splitsGroupedBy("f2"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #19
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource1() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0) .splitsGroupedBy(0); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #20
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource6() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField") .splitsGroupedBy("f0; f1.intField"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0,2))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #21
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource7() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField") .splitsGroupedBy("f1"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #22
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource8() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1") .splitsGroupedBy("f1.stringField"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example #23
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1) .splitsOrderedBy(new int[]{1, 0}, new Order[]{Order.ASCENDING, Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1, 0))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #24
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource4() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(0, 1) .splitsOrderedBy(new int[]{1}, new Order[]{Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple2<Long, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example #25
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1, 0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example #26
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedGroupedSource5() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f2") .splitsGroupedBy("f2"); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #27
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedSource2() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class); data.getSplitDataProperties() .splitsPartitionedBy(1, 0); data.output(new DiscardingOutputFormat<Tuple2<Long,String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example #28
Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource7() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1") .splitsOrderedBy("f1.stringField", new Order[]{Order.ASCENDING}); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(lprops.getGroupedFields() == null); Assert.assertTrue(lprops.getOrdering() == null); }
Example #29
Source File: PropertyDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void checkSinglePartitionedOrderedSource6() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType); data.getSplitDataProperties() .splitsPartitionedBy("f1.intField") .splitsOrderedBy("f1", new Order[]{Order.DESCENDING}); data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>()); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor(); GlobalProperties gprops = sourceNode.getGlobalProperties(); LocalProperties lprops = sourceNode.getLocalProperties(); Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2))); Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING); Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3))); Assert.assertTrue(lprops.getOrdering() == null); }
Example #30
Source File: ParquetTableSourceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testFieldsProjection() throws Exception { ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath); ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6}); // ensure a new reference is returned assertNotSame(projected, parquetTableSource); // ensure table schema is the same assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema()); // ensure that table source description differs assertNotEquals(parquetTableSource.explainSource(), projected.explainSource()); String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames(); TypeInformation[] fieldTypes = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes(); assertEquals( Types.ROW_NAMED( new String[] {fieldNames[2], fieldNames[4], fieldNames[6]}, fieldTypes[2], fieldTypes[4], fieldTypes[6] ), projected.getReturnType() ); // ensure ParquetInputFormat is configured with selected fields DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment()); InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat(); assertTrue(inputFormat instanceof ParquetRowInputFormat); ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat; assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames()); assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes()); }