org.apache.flink.api.java.operators.DataSource Java Exaples

Source File: JavaDistributeApp.java From 163-bigdate-note with GNU General Public License v3.0

7 votes

public static void main(String[] args) throws Exception {
    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    String filePath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\hello.txt";

    //1. 注册一个本地文件
    environment.registerCachedFile(filePath, "java-cf");
    DataSource<String> data = environment.fromElements("hadoop", "spark", "flink", "pyspark", "storm");

    data.map(new RichMapFunction<String, String>() {
        List<String> list = new ArrayList<>();
        @Override
        public void open(Configuration parameters) throws Exception {
            File file = getRuntimeContext().getDistributedCache().getFile("java-cf");
            List<String> lines = FileUtils.readLines(file);
            for (String line : lines) {
                System.out.println("line: " + line);
            }

        }

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    }).print();
}

Source File: CSVReaderTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testFieldTypes() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<Item> items = reader.tupleType(Item.class);

	TypeInformation<?> info = items.getType();
	if (!info.isTupleType()) {
		Assert.fail();
	} else {
		TupleTypeInfo<?> tinfo = (TupleTypeInfo<?>) info;
		Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
		Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
		Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	}

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) items.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}

Source File: ExecutionEnvironment.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param type The base class type for every element in the collection.
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}

Source File: ExecutionEnvironment.java From flink with Apache License 2.0

6 votes

/**
 * Creates a new data set that contains the given elements. The framework will determine the type according to the
 * based type user supplied. The elements should be the same or be the subclass to the based type.
 * The sequence of elements must not be empty.
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param type The base class type for every element in the collection.
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForClass(type);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}

Source File: CSVReaderTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setGlobalJobParameters(params);

    DataSource<String> dataSource = env.fromElements(WORDS);

    dataSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
        @Override
        public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] words = line.split("\\W+");
            for (String word : words) {
                out.collect(new Tuple2<>(word, 1));
            }
        }
    })
            .groupBy(0)
            .sum(1)
            .print();

    long count = dataSource.count();
    System.out.println(count);
}

Source File: JavaTableSQLAPI.java From 163-bigdate-note with GNU General Public License v3.0

6 votes

public static void main(String[] args) throws Exception {
    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    BatchTableEnvironment tableEnvironment = BatchTableEnvironment.getTableEnvironment(environment);
    String filepath = "file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\sales.csv";
    //csv => DataSet
    DataSource<Sales> csv = environment.readCsvFile(filepath)
            .ignoreFirstLine()
            .pojoType(Sales.class, "transactionId", "customerId", "itemId", "amountPaid");
    //csv.print();

    Table sales = tableEnvironment.fromDataSet(csv);
    tableEnvironment.registerTable("sales", sales);
    Table resultTable = tableEnvironment.sqlQuery("select customerId, sum(amountPaid) money from sales group by customerId");

    DataSet<Row> result = tableEnvironment.toDataSet(resultTable, Row.class);
    result.print();
}

Source File: CSVReaderTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSubClassWithPartialsInHierarchie() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<FinalItem> sitems = reader.tupleType(FinalItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(FinalItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(3).getClass());
	Assert.assertEquals(ValueTypeInfo.class, tinfo.getTypeAt(4).getClass());
	Assert.assertEquals(StringValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(3)).getTypeClass());
	Assert.assertEquals(LongValue.class, ((ValueTypeInfo<?>) tinfo.getTypeAt(4)).getTypeClass());

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[] {Integer.class, String.class, Double.class, StringValue.class, LongValue.class}, inputFormat.getFieldTypes());
}

Source File: CSVReaderTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}

Source File: CsvReader.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of
 * {@link Tuple}. The type information for the fields is obtained from the type class. The type
 * consequently needs to specify all generic field types of the tuple.
 *
 * @param targetType The class of the target type, needs to be a subclass of Tuple.
 * @return The DataSet representing the parsed CSV data.
 */
public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) {
	Preconditions.checkNotNull(targetType, "The target type class must not be null.");
	if (!Tuple.class.isAssignableFrom(targetType)) {
		throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName());
	}

	@SuppressWarnings("unchecked")
	TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType);
	CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask);

	Class<?>[] classes = new Class<?>[typeInfo.getArity()];
	for (int i = 0; i < typeInfo.getArity(); i++) {
		classes[i] = typeInfo.getTypeAt(i).getTypeClass();
	}

	configureInputFormat(inputFormat);
	return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName());
}

Source File: GraphGeneratorUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Generates {@link Vertex Vertices} with sequential, numerical labels.
 *
 * @param env the Flink execution environment.
 * @param parallelism operator parallelism
 * @param vertexCount number of sequential vertex labels
 * @return {@link DataSet} of sequentially labeled {@link Vertex vertices}
 */
public static DataSet<Vertex<LongValue, NullValue>> vertexSequence(ExecutionEnvironment env, int parallelism, long vertexCount) {
	Preconditions.checkArgument(vertexCount >= 0, "Vertex count must be non-negative");

	if (vertexCount == 0) {
		return env
			.fromCollection(Collections.emptyList(), TypeInformation.of(new TypeHint<Vertex<LongValue, NullValue>>(){}))
				.setParallelism(parallelism)
				.name("Empty vertex set");
	} else {
		LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);

		DataSource<LongValue> vertexLabels = env
			.fromParallelCollection(iterator, LongValue.class)
				.setParallelism(parallelism)
				.name("Vertex indices");

		return vertexLabels
			.map(new CreateVertex())
				.setParallelism(parallelism)
				.name("Vertex sequence");
	}
}

Source File: ExecutionEnvironment.java From flink with Apache License 2.0

6 votes

/**
 * Creates a new data set that contains the given elements. The elements must all be of the same type,
 * for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The elements to make up the data set.
 * @return A DataSet representing the given list of elements.
 */
@SafeVarargs
public final <X> DataSource<X> fromElements(X... data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.length == 0) {
		throw new IllegalArgumentException("The number of elements must not be zero.");
	}

	TypeInformation<X> typeInfo;
	try {
		typeInfo = TypeExtractor.getForObject(data[0]);
	}
	catch (Exception e) {
		throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
				+ "; please specify the TypeInformation manually via "
				+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)", e);
	}

	return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
}

Source File: BootstrapTransformationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testOperatorSpecificMaxParallelismRespected() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSource<Integer> input = env.fromElements(0);

	BootstrapTransformation<Integer> transformation = OperatorTransformation
		.bootstrapWith(input)
		.setMaxParallelism(1)
		.transform(new ExampleStateBootstrapFunction());

	int maxParallelism = transformation.getMaxParallelism(4);
	DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(
		OperatorIDGenerator.fromUid("uid"),
		new MemoryStateBackend(),
		new Path(),
		maxParallelism
	);

	Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result));
}

Source File: CSVReaderTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testSubClass() throws Exception {
	CsvReader reader = getCsvReader();
	DataSource<SubItem> sitems = reader.tupleType(SubItem.class);
	TypeInformation<?> info = sitems.getType();

	Assert.assertEquals(true, info.isTupleType());
	Assert.assertEquals(SubItem.class, info.getTypeClass());

	@SuppressWarnings("unchecked")
	TupleTypeInfo<SubItem> tinfo = (TupleTypeInfo<SubItem>) info;

	Assert.assertEquals(BasicTypeInfo.INT_TYPE_INFO, tinfo.getTypeAt(0));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(1));
	Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tinfo.getTypeAt(2));
	Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tinfo.getTypeAt(3));

	CsvInputFormat<?> inputFormat = (CsvInputFormat<?>) sitems.getInputFormat();
	Assert.assertArrayEquals(new Class<?>[]{Integer.class, String.class, Double.class, String.class}, inputFormat.getFieldTypes());
}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PartitionOperatorTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testRangePartitionByComplexKeyWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0, 1).withOrders(Order.ASCENDING, Order.DESCENDING);
}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsGroupedBy("f0; f1.intField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0,2)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource7() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsGroupedBy("f1");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource8() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsGroupedBy("f1.stringField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsOrderedBy(new int[]{1, 0}, new Order[]{Order.ASCENDING, Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1, 0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource4() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0, 1)
			.splitsOrderedBy(new int[]{1}, new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource7() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsOrderedBy("f1.stringField", new Order[]{Order.ASCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: PropertyDataSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}

Source File: ParquetTableSourceTest.java From flink with Apache License 2.0

5 votes

@Test
public void testFieldsProjection() throws Exception {
	ParquetTableSource parquetTableSource = createNestedTestParquetTableSource(testPath);
	ParquetTableSource projected = (ParquetTableSource) parquetTableSource.projectFields(new int[] {2, 4, 6});

	// ensure a new reference is returned
	assertNotSame(projected, parquetTableSource);

	// ensure table schema is the same
	assertEquals(parquetTableSource.getTableSchema(), projected.getTableSchema());

	// ensure that table source description differs
	assertNotEquals(parquetTableSource.explainSource(), projected.explainSource());

	String[] fieldNames = ((RowTypeInfo) NESTED_ROW_TYPE).getFieldNames();
	TypeInformation[] fieldTypes =  ((RowTypeInfo) NESTED_ROW_TYPE).getFieldTypes();
	assertEquals(
		Types.ROW_NAMED(
			new String[] {fieldNames[2], fieldNames[4], fieldNames[6]},
			fieldTypes[2], fieldTypes[4], fieldTypes[6]
		),
		projected.getReturnType()
	);

	// ensure ParquetInputFormat is configured with selected fields
	DataSet<Row> data = projected.getDataSet(ExecutionEnvironment.createLocalEnvironment());
	InputFormat<Row, ?> inputFormat = ((DataSource<Row>) data).getInputFormat();
	assertTrue(inputFormat instanceof ParquetRowInputFormat);
	ParquetRowInputFormat parquetIF = (ParquetRowInputFormat) inputFormat;
	assertArrayEquals(new String[]{fieldNames[2], fieldNames[4], fieldNames[6]}, parquetIF.getFieldNames());
	assertArrayEquals(new TypeInformation<?>[]{fieldTypes[2], fieldTypes[4], fieldTypes[6]}, parquetIF.getFieldTypes());
}

org.apache.flink.api.java.operators.DataSource Java Examples