org.apache.flink.api.java.io.CollectionInputFormat Java Examples

The following examples show how to use org.apache.flink.api.java.io.CollectionInputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestValuesTableFactory.java From flink with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
	TypeSerializer<RowData> serializer = (TypeSerializer<RowData>) runtimeProviderContext
		.createTypeInformation(physicalSchema.toRowDataType())
		.createSerializer(new ExecutionConfig());
	DataStructureConverter converter = runtimeProviderContext.createDataStructureConverter(physicalSchema.toRowDataType());
	converter.open(RuntimeConverter.Context.create(TestValuesTableFactory.class.getClassLoader()));
	Collection<RowData> values = convertToRowData(data, projectedFields, converter);

	if (runtimeSource.equals("SourceFunction")) {
		try {
			return SourceFunctionProvider.of(
				new FromElementsFunction<>(serializer, values),
				bounded);
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	} else if (runtimeSource.equals("InputFormat")) {
		return InputFormatProvider.of(new CollectionInputFormat<>(values, serializer));
	} else {
		throw new IllegalArgumentException("Unsupported runtime source class: " + runtimeSource);
	}
}

Example #2

Source File: ExecutionEnvironment.java From Flink-CEPplus with Apache License 2.0

4 votes

private <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type, String callLocationName) {
	CollectionInputFormat.checkCollection(data, type.getTypeClass());
	return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, callLocationName);
}

Example #3

Source File: HiveTableSinkTest.java From flink with Apache License 2.0

4 votes

@Override
public InputFormat<Row, ?> getInputFormat() {
	return new CollectionInputFormat<>(data, rowTypeInfo.createSerializer(new ExecutionConfig()));
}

Example #4

Source File: ExecutionEnvironment.java From flink with Apache License 2.0

4 votes

private <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type, String callLocationName) {
	CollectionInputFormat.checkCollection(data, type.getTypeClass());
	return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, callLocationName);
}

Example #5

Source File: HiveTableSinkITCase.java From flink with Apache License 2.0

4 votes

@Override
public InputFormat<Row, ?> getInputFormat() {
	return new CollectionInputFormat<>(data, rowTypeInfo.createSerializer(new ExecutionConfig()));
}

Example #6

Source File: FileSystemTableSource.java From flink with Apache License 2.0

4 votes

private InputFormat<RowData, ?> getInputFormat() {
	// When this table has no partition, just return a empty source.
	if (!partitionKeys.isEmpty() && getOrFetchPartitions().isEmpty()) {
		return new CollectionInputFormat<>(new ArrayList<>(), null);
	}

	FileSystemFormatFactory formatFactory = createFormatFactory(properties);
	Configuration conf = new Configuration();
	properties.forEach(conf::setString);
	return formatFactory.createReader(new FileSystemFormatFactory.ReaderContext() {

		@Override
		public TableSchema getSchema() {
			return schema;
		}

		@Override
		public ReadableConfig getFormatOptions() {
			return new DelegatingConfiguration(conf, formatFactory.factoryIdentifier() + ".");
		}

		@Override
		public List<String> getPartitionKeys() {
			return partitionKeys;
		}

		@Override
		public String getDefaultPartName() {
			return defaultPartName;
		}

		@Override
		public Path[] getPaths() {
			if (partitionKeys.isEmpty()) {
				return new Path[] {path};
			} else {
				return getOrFetchPartitions().stream()
						.map(FileSystemTableSource.this::toFullLinkedPartSpec)
						.map(PartitionPathUtils::generatePartitionPath)
						.map(n -> new Path(path, n))
						.toArray(Path[]::new);
			}
		}

		@Override
		public int[] getProjectFields() {
			return readFields();
		}

		@Override
		public long getPushedDownLimit() {
			return limit == null ? Long.MAX_VALUE : limit;
		}

		@Override
		public List<Expression> getPushedDownFilters() {
			return filters == null ? Collections.emptyList() : filters;
		}
	});
}

Example #7

Source File: ExecutionEnvironment.java From flink with Apache License 2.0

4 votes

private <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type, String callLocationName) {
	CollectionInputFormat.checkCollection(data, type.getTypeClass());
	return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, callLocationName);
}

Example #8

Source File: ExecutionEnvironment.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Creates a DataSet from the given non-empty collection. The type of the data set is that
 * of the elements in the collection.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The collection of elements to create the data set from.
 * @return A DataSet representing the given collection.
 *
 * @see #fromCollection(Collection, TypeInformation)
 */
public <X> DataSource<X> fromCollection(Collection<X> data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.size() == 0) {
		throw new IllegalArgumentException("The size of the collection must not be empty.");
	}

	X firstValue = data.iterator().next();

	TypeInformation<X> type = TypeExtractor.getForObject(firstValue);
	CollectionInputFormat.checkCollection(data, type.getTypeClass());
	return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, Utils.getCallLocationName());
}

Example #9

Source File: ExecutionEnvironment.java From flink with Apache License 2.0

3 votes

/**
 * Creates a DataSet from the given non-empty collection. The type of the data set is that
 * of the elements in the collection.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The collection of elements to create the data set from.
 * @return A DataSet representing the given collection.
 *
 * @see #fromCollection(Collection, TypeInformation)
 */
public <X> DataSource<X> fromCollection(Collection<X> data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.size() == 0) {
		throw new IllegalArgumentException("The size of the collection must not be empty.");
	}

	X firstValue = data.iterator().next();

	TypeInformation<X> type = TypeExtractor.getForObject(firstValue);
	CollectionInputFormat.checkCollection(data, type.getTypeClass());
	return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, Utils.getCallLocationName());
}

Example #10

Source File: ExecutionEnvironment.java From flink with Apache License 2.0

3 votes

/**
 * Creates a DataSet from the given non-empty collection. The type of the data set is that
 * of the elements in the collection.
 *
 * <p>The framework will try and determine the exact type from the collection elements.
 * In case of generic elements, it may be necessary to manually supply the type information
 * via {@link #fromCollection(Collection, TypeInformation)}.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The collection of elements to create the data set from.
 * @return A DataSet representing the given collection.
 *
 * @see #fromCollection(Collection, TypeInformation)
 */
public <X> DataSource<X> fromCollection(Collection<X> data) {
	if (data == null) {
		throw new IllegalArgumentException("The data must not be null.");
	}
	if (data.size() == 0) {
		throw new IllegalArgumentException("The size of the collection must not be empty.");
	}

	X firstValue = data.iterator().next();

	TypeInformation<X> type = TypeExtractor.getForObject(firstValue);
	CollectionInputFormat.checkCollection(data, type.getTypeClass());
	return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, Utils.getCallLocationName());
}