com.google.cloud.dataflow.sdk.values.TupleTag Java Exaples

Source File: FlinkStreamingTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

private Map<TupleTag<?>, Integer> transformTupleTagsToLabels(TupleTag<?> mainTag, Set<TupleTag<?>> secondaryTags) {
	Map<TupleTag<?>, Integer> tagToLabelMap = Maps.newHashMap();
	tagToLabelMap.put(mainTag, MAIN_TAG_INDEX);
	int count = MAIN_TAG_INDEX + 1;
	for (TupleTag<?> tag : secondaryTags) {
		if (!tagToLabelMap.containsKey(tag)) {
			tagToLabelMap.put(tag, count++);
		}
	}
	return tagToLabelMap;
}

Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public WindowingInternals<IN, OUT> windowingInternals() {
	return new WindowingInternals<IN, OUT>() {
		@Override
		public StateInternals stateInternals() {
			return null;
		}

		@Override
		public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {

		}

		@Override
		public TimerInternals timerInternals() {
			return null;
		}

		@Override
		public Collection<? extends BoundedWindow> windows() {
			return ImmutableList.of(GlobalWindow.INSTANCE);
		}

		@Override
		public PaneInfo pane() {
			return PaneInfo.NO_FIRING;
		}

		@Override
		public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
			throw new RuntimeException("sideInput() not implemented.");
		}
	};
}

Source File: FlinkParDoBoundMultiWrapper.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public <T> void sideOutputWithTimestampHelper(WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<RawUnionValue>> collector, TupleTag<T> tag) {
	checkTimestamp(inElement, timestamp);
	Integer index = outputLabels.get(tag);
	if (index != null) {
		collector.collect(makeWindowedValue(
				new RawUnionValue(index, output),
				timestamp,
				inElement.getWindows(),
				inElement.getPane()));
	}
}

Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0

5 votes

@Override
@SuppressWarnings("unchecked")
public <T> void sideOutput(TupleTag<T> tag, T value) {
	Integer index = outputMap.get(tag);
	if (index != null) {
		outCollector.collect(new RawUnionValue(index, value));
	}
}

Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public WindowingInternals<IN, OUT> windowingInternalsHelper(final WindowedValue<IN> inElement, final Collector<WindowedValue<OUT>> collector) {
	return new WindowingInternals<IN, OUT>() {
		@Override
		public StateInternals stateInternals() {
			throw new NullPointerException("StateInternals are not available for ParDo.Bound().");
		}

		@Override
		public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
			collector.collect(makeWindowedValue(output, timestamp, windows, pane));
		}

		@Override
		public TimerInternals timerInternals() {
			throw new NullPointerException("TimeInternals are not available for ParDo.Bound().");
		}

		@Override
		public Collection<? extends BoundedWindow> windows() {
			return inElement.getWindows();
		}

		@Override
		public PaneInfo pane() {
			return inElement.getPane();
		}

		@Override
		public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
			throw new RuntimeException("writePCollectionViewData() not supported in Streaming mode.");
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
			throw new RuntimeException("sideInput() not implemented.");
		}
	};
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
	KeyedPCollectionTuple<K> input = context.getInput(transform);

	CoGbkResultSchema schema = input.getCoGbkResultSchema();
	List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();

	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);

	TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
	TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();

	PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
	PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();

	DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
	DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);

	TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();

	FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);

	Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
	Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());

	DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
															keySelector1, keySelector2,
			                                                aggregator, typeInfo, null, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), out);
}

Source File: ParDoMultiOutputITCase.java From flink-dataflow with Apache License 2.0

4 votes

@Override
protected void testProgram() throws Exception {
	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> words = p.apply(Create.of("Hello", "Whatupmyman", "hey", "SPECIALthere", "MAAA", "MAAFOOO"));

	// Select words whose length is below a cut off,
	// plus the lengths of words that are above the cut off.
	// Also select words starting with "MARKER".
	final int wordLengthCutOff = 3;
	// Create tags to use for the main and side outputs.
	final TupleTag<String> wordsBelowCutOffTag = new TupleTag<String>(){};
	final TupleTag<Integer> wordLengthsAboveCutOffTag = new TupleTag<Integer>(){};
	final TupleTag<String> markedWordsTag = new TupleTag<String>(){};

	PCollectionTuple results =
			words.apply(ParDo
					.withOutputTags(wordsBelowCutOffTag, TupleTagList.of(wordLengthsAboveCutOffTag)
							.and(markedWordsTag))
					.of(new DoFn<String, String>() {
						final TupleTag<String> specialWordsTag = new TupleTag<String>() {
						};

						public void processElement(ProcessContext c) {
							String word = c.element();
							if (word.length() <= wordLengthCutOff) {
								c.output(word);
							} else {
								c.sideOutput(wordLengthsAboveCutOffTag, word.length());
							}
							if (word.startsWith("MAA")) {
								c.sideOutput(markedWordsTag, word);
							}

							if (word.startsWith("SPECIAL")) {
								c.sideOutput(specialWordsTag, word);
							}
						}
					}));

	// Extract the PCollection results, by tag.
	PCollection<String> wordsBelowCutOff = results.get(wordsBelowCutOffTag);
	PCollection<Integer> wordLengthsAboveCutOff = results.get
			(wordLengthsAboveCutOffTag);
	PCollection<String> markedWords = results.get(markedWordsTag);

	markedWords.apply(TextIO.Write.to(resultPath));

	p.run();
}

Source File: FileToState.java From policyscanner with Apache License 2.0

4 votes

public FileToState(TupleTag<GCPResourceErrorInfo> tag) {
  errorOutputTag = tag;
}

Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
	sideOutput(tag, output);
}

Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public <T> void sideOutput(TupleTag<T> tag, T output) {
	// ignore the side output, this can happen when a user does not register
	// side outputs but then outputs using a freshly created TupleTag.
}

Source File: FlinkCoGroupKeyedListAggregator.java From flink-dataflow with Apache License 2.0

4 votes

public FlinkCoGroupKeyedListAggregator(CoGbkResultSchema schema, TupleTag<?> tupleTag1, TupleTag<?> tupleTag2) {
	this.schema = schema;
	this.tupleTag1 = tupleTag1;
	this.tupleTag2 = tupleTag2;
}

Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
	sideOutput(tag, output);
}

Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0

4 votes

public FlinkMultiOutputDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options, Map<TupleTag<?>, Integer> outputMap) {
	this.doFn = doFn;
	this.options = options;
	this.outputMap = outputMap;
}

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTranslationContext context) {
	DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));

	final DoFn<IN, OUT> doFn = transform.getFn();

	Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll();

	Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
	// put the main output at index 0, FlinkMultiOutputDoFnFunction also expects this
	outputMap.put(transform.getMainOutputTag(), 0);
	int count = 1;
	for (TupleTag<?> tag: outputs.keySet()) {
		if (!outputMap.containsKey(tag)) {
			outputMap.put(tag, count++);
		}
	}

	// collect all output Coders and create a UnionCoder for our tagged outputs
	List<Coder<?>> outputCoders = Lists.newArrayList();
	for (PCollection<?> coll: outputs.values()) {
		outputCoders.add(coll.getCoder());
	}

	UnionCoder unionCoder = UnionCoder.of(outputCoders);

	@SuppressWarnings("unchecked")
	TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder);

	@SuppressWarnings("unchecked")
	FlinkMultiOutputDoFnFunction<IN, OUT> doFnWrapper = new FlinkMultiOutputDoFnFunction(doFn, context.getPipelineOptions(), outputMap);
	MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());

	transformSideInputs(transform.getSideInputs(), outputDataSet, context);

	for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
		TypeInformation<Object> outputType = context.getTypeInfo(output.getValue());
		int outputTag = outputMap.get(output.getKey());
		FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag);
		FlatMapOperator<RawUnionValue, Object> pruningOperator = new
				FlatMapOperator<>(outputDataSet, outputType,
				pruningFunction, output.getValue().getName());
		context.setOutputDataSet(output.getValue(), pruningOperator);

	}
}

Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public <T> void sideOutputWithTimestampHelper(WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<OUT>> outCollector, TupleTag<T> tag) {
	// ignore the side output, this can happen when a user does not register
	// side outputs but then outputs using a freshly created TupleTag.
	throw new RuntimeException("sideOutput() not not available in ParDo.Bound().");
}

Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0

4 votes

public abstract <T> void sideOutputWithTimestampHelper(
WindowedValue<IN> inElement,
T output,
Instant timestamp,
Collector<WindowedValue<OUTFL>> outCollector,
TupleTag<T> tag);

Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
	sideOutputWithTimestampHelper(element, output, timestamp, collector, tag);
}

Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0

4 votes

@Override
public <T> void sideOutput(TupleTag<T> tag, T output) {
	sideOutputWithTimestamp(tag, output, this.element.getTimestamp());
}

Source File: FlinkParDoBoundMultiWrapper.java From flink-dataflow with Apache License 2.0

4 votes

public FlinkParDoBoundMultiWrapper(PipelineOptions options, WindowingStrategy<?, ?> windowingStrategy, DoFn<IN, OUT> doFn, TupleTag<?> mainTag, Map<TupleTag<?>, Integer> tagsToLabels) {
	super(options, windowingStrategy, doFn);
	this.mainTag = Preconditions.checkNotNull(mainTag);
	this.outputLabels = Preconditions.checkNotNull(tagsToLabels);
}

Source File: DistributeWorkDataDoFn.java From data-timeseries-java with Apache License 2.0

4 votes

public DistributeWorkDataDoFn(WorkPacketConfig workPacketView, TupleTag<Integer> tag) {
  this.workPacketView = workPacketView;
  this.tag  = tag;
}

Source File: CreateWorkPacketsDoFn.java From data-timeseries-java with Apache License 2.0

4 votes

public CreateWorkPacketsDoFn(WorkPacketConfig workPacketView, TupleTag<Integer> counter) {
  this.workPacketView = workPacketView;
  this.counter = counter;
}

Source File: ExtractState.java From policyscanner with Apache License 2.0

4 votes

public ExtractState(TupleTag<GCPResourceErrorInfo> tag) {
  errorOutputTag = tag;
}

com.google.cloud.dataflow.sdk.values.TupleTag Java Examples