Java Code Examples for org.apache.flink.api.java.DataSet#getType()
The following examples show how to use
org.apache.flink.api.java.DataSet#getType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AggregateOperator.java From flink with Apache License 2.0 | 6 votes |
/** * Non grouped aggregation. */ public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) { super(Preconditions.checkNotNull(input), input.getType()); Preconditions.checkNotNull(function); this.aggregateLocationName = aggregateLocationName; if (!input.getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // this is the first aggregation operator after a regular data set (non grouped aggregation) this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = null; }
Example 2
Source File: DataSetUtils.java From flink with Apache License 2.0 | 6 votes |
/** * Summarize a DataSet of Tuples by collecting single pass statistics for all columns. * * <p>Example usage: * <pre> * {@code * Dataset<Tuple3<Double, String, Boolean>> input = // [...] * Tuple3<NumericColumnSummary,StringColumnSummary, BooleanColumnSummary> summary = DataSetUtils.summarize(input) * * summary.f0.getStandardDeviation() * summary.f1.getMaxLength() * } * </pre> * @return the summary as a Tuple the same width as input rows */ public static <R extends Tuple, T extends Tuple> R summarize(DataSet<T> input) throws Exception { if (!input.getType().isTupleType()) { throw new IllegalArgumentException("summarize() is only implemented for DataSet's of Tuples"); } final TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType(); DataSet<TupleSummaryAggregator<R>> result = input.mapPartition(new MapPartitionFunction<T, TupleSummaryAggregator<R>>() { @Override public void mapPartition(Iterable<T> values, Collector<TupleSummaryAggregator<R>> out) throws Exception { TupleSummaryAggregator<R> aggregator = SummaryAggregatorFactory.create(inType); for (Tuple value : values) { aggregator.aggregate(value); } out.collect(aggregator); } }).reduce(new ReduceFunction<TupleSummaryAggregator<R>>() { @Override public TupleSummaryAggregator<R> reduce(TupleSummaryAggregator<R> agg1, TupleSummaryAggregator<R> agg2) throws Exception { agg1.combine(agg2); return agg1; } }); return result.collect().get(0).result(); }
Example 3
Source File: ProjectOperator.java From flink with Apache License 2.0 | 6 votes |
public Projection(DataSet<T> ds, int[] fieldIndexes) { if (!(ds.getType() instanceof TupleTypeInfo)) { throw new UnsupportedOperationException("project() can only be applied to DataSets of Tuples."); } if (fieldIndexes.length == 0) { throw new IllegalArgumentException("project() needs to select at least one (1) field."); } else if (fieldIndexes.length > Tuple.MAX_ARITY - 1) { throw new IllegalArgumentException( "project() may select only up to (" + (Tuple.MAX_ARITY - 1) + ") fields."); } int maxFieldIndex = ds.getType().getArity(); for (int fieldIndexe : fieldIndexes) { Preconditions.checkElementIndex(fieldIndexe, maxFieldIndex); } this.ds = ds; this.fieldIndexes = fieldIndexes; }
Example 4
Source File: PartitionOperator.java From flink with Apache License 2.0 | 6 votes |
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner, TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) { super(input, input.getType()); Preconditions.checkNotNull(pMethod); Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys"); Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner."); Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition."); if (distribution != null) { Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified."); Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())), "The types of the flat key fields must be equal to the types of the fields of the distribution."); } if (customPartitioner != null) { pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo); } this.pMethod = pMethod; this.pKeys = pKeys; this.partitionLocationName = partitionLocationName; this.customPartitioner = customPartitioner; this.distribution = distribution; }
Example 5
Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private <IN1, IN2, OUT> void createCoGroupOperation(PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN1> op1 = sets.getDataSet(info.parentID); DataSet<IN2> op2 = sets.getDataSet(info.otherID); Keys.ExpressionKeys<IN1> key1 = new Keys.ExpressionKeys<>(info.keys1.toArray(new String[info.keys1.size()]), op1.getType()); Keys.ExpressionKeys<IN2> key2 = new Keys.ExpressionKeys<>(info.keys2.toArray(new String[info.keys2.size()]), op2.getType()); PythonCoGroup<IN1, IN2, OUT> pcg = new PythonCoGroup<>(operatorConfig, info.envID, info.setID, type); sets.add(info.setID, new CoGroupRawOperator<>(op1, op2, key1, key2, pcg, type, info.name).setParallelism(info.parallelism)); }
Example 6
Source File: SortPartitionOperator.java From flink with Apache License 2.0 | 5 votes |
private SortPartitionOperator(DataSet<T> dataSet, String sortLocationName) { super(dataSet, dataSet.getType()); keys = new ArrayList<>(); orders = new ArrayList<>(); this.sortLocationName = sortLocationName; }
Example 7
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) { DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform)); GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>(); TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform)); Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType())); GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet = new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); }
Example 8
Source File: DeltaIteration.java From flink with Apache License 2.0 | 5 votes |
public DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) { initialSolutionSet = solutionSet; initialWorkset = workset; solutionSetPlaceholder = new SolutionSetPlaceHolder<>(context, solutionSet.getType(), this); worksetPlaceholder = new WorksetPlaceHolder<>(context, workset.getType()); this.keys = keys; this.maxIterations = maxIterations; }
Example 9
Source File: DistinctOperator.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public DistinctOperator(DataSet<T> input, Keys<T> keys, String distinctLocationName) { super(input, input.getType()); this.distinctLocationName = distinctLocationName; // if keys is null distinction is done on all fields if (keys == null) { keys = new Keys.ExpressionKeys<>(input.getType()); } this.keys = keys; }
Example 10
Source File: DeltaIteration.java From flink with Apache License 2.0 | 5 votes |
public DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) { initialSolutionSet = solutionSet; initialWorkset = workset; solutionSetPlaceholder = new SolutionSetPlaceHolder<>(context, solutionSet.getType(), this); worksetPlaceholder = new WorksetPlaceHolder<>(context, workset.getType()); this.keys = keys; this.maxIterations = maxIterations; }
Example 11
Source File: SortPartitionOperator.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private SortPartitionOperator(DataSet<T> dataSet, String sortLocationName) { super(dataSet, dataSet.getType()); keys = new ArrayList<>(); orders = new ArrayList<>(); this.sortLocationName = sortLocationName; }
Example 12
Source File: DataSetUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Generate a sample of DataSet which contains fixed size elements. * * <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with * fraction unless you need exact precision. * * @param withReplacement Whether element can be selected more than once. * @param numSamples The expected sample size. * @param seed Random number generator seed. * @return The sampled DataSet */ public static <T> DataSet<T> sampleWithSize( DataSet <T> input, final boolean withReplacement, final int numSamples, final long seed) { SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed); MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition); // There is no previous group, so the parallelism of GroupReduceOperator is always 1. String callLocation = Utils.getCallLocationName(); SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed); return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation); }
Example 13
Source File: ReduceOperator.java From flink with Apache License 2.0 | 5 votes |
/** * This is the case for a reduce-all case (in contrast to the reduce-per-group case). * * @param input * @param function */ public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function, String defaultName) { super(input, input.getType()); this.function = function; this.grouper = null; this.defaultName = defaultName; this.hint = null; }
Example 14
Source File: DataSetUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Generate a sample of DataSet which contains fixed size elements. * * <p><strong>NOTE:</strong> Sample with fixed size is not as efficient as sample with fraction, use sample with * fraction unless you need exact precision. * * @param withReplacement Whether element can be selected more than once. * @param numSamples The expected sample size. * @param seed Random number generator seed. * @return The sampled DataSet */ public static <T> DataSet<T> sampleWithSize( DataSet <T> input, final boolean withReplacement, final int numSamples, final long seed) { SampleInPartition<T> sampleInPartition = new SampleInPartition<>(withReplacement, numSamples, seed); MapPartitionOperator mapPartitionOperator = input.mapPartition(sampleInPartition); // There is no previous group, so the parallelism of GroupReduceOperator is always 1. String callLocation = Utils.getCallLocationName(); SampleInCoordinator<T> sampleInCoordinator = new SampleInCoordinator<>(withReplacement, numSamples, seed); return new GroupReduceOperator<>(mapPartitionOperator, input.getType(), sampleInCoordinator, callLocation); }
Example 15
Source File: DataSetUtils.java From flink with Apache License 2.0 | 4 votes |
/** * Range-partitions a DataSet on the specified fields. */ public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, String... fields) { return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType()), distribution, Utils.getCallLocationName()); }
Example 16
Source File: FilterOperator.java From flink with Apache License 2.0 | 4 votes |
public FilterOperator(DataSet<T> input, FilterFunction<T> function, String defaultName) { super(input, input.getType()); this.function = function; this.defaultName = defaultName; }
Example 17
Source File: DataSetUtils.java From flink with Apache License 2.0 | 4 votes |
/** * Range-partitions a DataSet on the specified fields. */ public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, String... fields) { return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType()), distribution, Utils.getCallLocationName()); }
Example 18
Source File: CrossOperator.java From flink with Apache License 2.0 | 4 votes |
public CrossProjection(DataSet<I1> ds1, DataSet<I2> ds2, int[] firstFieldIndexes, int[] secondFieldIndexes, CrossHint hint) { this.ds1 = ds1; this.ds2 = ds2; this.hint = hint; boolean isFirstTuple; boolean isSecondTuple; if (ds1.getType() instanceof TupleTypeInfo) { numFieldsDs1 = ((TupleTypeInfo<?>) ds1.getType()).getArity(); isFirstTuple = true; } else { numFieldsDs1 = 1; isFirstTuple = false; } if (ds2.getType() instanceof TupleTypeInfo) { numFieldsDs2 = ((TupleTypeInfo<?>) ds2.getType()).getArity(); isSecondTuple = true; } else { numFieldsDs2 = 1; isSecondTuple = false; } boolean isTuple; boolean firstInput; if (firstFieldIndexes != null && secondFieldIndexes == null) { // index array for first input is provided firstInput = true; isTuple = isFirstTuple; this.fieldIndexes = firstFieldIndexes; if (this.fieldIndexes.length == 0) { // no indexes provided, treat tuple as regular object isTuple = false; } } else if (firstFieldIndexes == null && secondFieldIndexes != null) { // index array for second input is provided firstInput = false; isTuple = isSecondTuple; this.fieldIndexes = secondFieldIndexes; if (this.fieldIndexes.length == 0) { // no indexes provided, treat tuple as regular object isTuple = false; } } else if (firstFieldIndexes == null && secondFieldIndexes == null) { throw new IllegalArgumentException("You must provide at least one field index array."); } else { throw new IllegalArgumentException("You must provide at most one field index array."); } if (!isTuple && this.fieldIndexes.length != 0) { // field index provided for non-Tuple input throw new IllegalArgumentException("Input is not a Tuple. Call projectFirst() (or projectSecond()) without arguments to include it."); } else if (this.fieldIndexes.length > 22) { throw new IllegalArgumentException("You may select only up to twenty-two (22) fields."); } if (isTuple) { this.isFieldInFirst = new boolean[this.fieldIndexes.length]; // check field indexes and adapt to position in tuple int maxFieldIndex = firstInput ? numFieldsDs1 : numFieldsDs2; for (int i = 0; i < this.fieldIndexes.length; i++) { Preconditions.checkElementIndex(this.fieldIndexes[i], maxFieldIndex); if (firstInput) { this.isFieldInFirst[i] = true; } else { this.isFieldInFirst[i] = false; } } } else { this.isFieldInFirst = new boolean[]{firstInput}; this.fieldIndexes = new int[]{-1}; } }
Example 19
Source File: DataSetUtils.java From flink with Apache License 2.0 | 4 votes |
/** * Range-partitions a DataSet using the specified key selector function. */ public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType()); return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName()); }
Example 20
Source File: EdgeList.java From flink with Apache License 2.0 | 3 votes |
/** * Check whether the edge type of the {@link DataSet} is {@link NullValue}. * * @param edges data set for introspection * @param <T> graph ID type * @param <ET> edge value type * @return whether the edge type of the {@link DataSet} is {@link NullValue} */ private static <T, ET> boolean hasNullValueEdges(DataSet<Edge<T, ET>> edges) { TypeInformation<?> genericTypeInfo = edges.getType(); @SuppressWarnings("unchecked") TupleTypeInfo<Tuple3<T, T, ET>> tupleTypeInfo = (TupleTypeInfo<Tuple3<T, T, ET>>) genericTypeInfo; return tupleTypeInfo.getTypeAt(2).equals(ValueTypeInfo.NULL_VALUE_TYPE_INFO); }