com.google.cloud.dataflow.sdk.values.TupleTag Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.values.TupleTag.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkStreamingTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
private Map<TupleTag<?>, Integer> transformTupleTagsToLabels(TupleTag<?> mainTag, Set<TupleTag<?>> secondaryTags) { Map<TupleTag<?>, Integer> tagToLabelMap = Maps.newHashMap(); tagToLabelMap.put(mainTag, MAIN_TAG_INDEX); int count = MAIN_TAG_INDEX + 1; for (TupleTag<?> tag : secondaryTags) { if (!tagToLabelMap.containsKey(tag)) { tagToLabelMap.put(tag, count++); } } return tagToLabelMap; }
Example #2
Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public WindowingInternals<IN, OUT> windowingInternals() { return new WindowingInternals<IN, OUT>() { @Override public StateInternals stateInternals() { return null; } @Override public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { } @Override public TimerInternals timerInternals() { return null; } @Override public Collection<? extends BoundedWindow> windows() { return ImmutableList.of(GlobalWindow.INSTANCE); } @Override public PaneInfo pane() { return PaneInfo.NO_FIRING; } @Override public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException { } @Override public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) { throw new RuntimeException("sideInput() not implemented."); } }; }
Example #3
Source File: FlinkParDoBoundMultiWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public <T> void sideOutputWithTimestampHelper(WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<RawUnionValue>> collector, TupleTag<T> tag) { checkTimestamp(inElement, timestamp); Integer index = outputLabels.get(tag); if (index != null) { collector.collect(makeWindowedValue( new RawUnionValue(index, output), timestamp, inElement.getWindows(), inElement.getPane())); } }
Example #4
Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override @SuppressWarnings("unchecked") public <T> void sideOutput(TupleTag<T> tag, T value) { Integer index = outputMap.get(tag); if (index != null) { outCollector.collect(new RawUnionValue(index, value)); } }
Example #5
Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public WindowingInternals<IN, OUT> windowingInternalsHelper(final WindowedValue<IN> inElement, final Collector<WindowedValue<OUT>> collector) { return new WindowingInternals<IN, OUT>() { @Override public StateInternals stateInternals() { throw new NullPointerException("StateInternals are not available for ParDo.Bound()."); } @Override public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { collector.collect(makeWindowedValue(output, timestamp, windows, pane)); } @Override public TimerInternals timerInternals() { throw new NullPointerException("TimeInternals are not available for ParDo.Bound()."); } @Override public Collection<? extends BoundedWindow> windows() { return inElement.getWindows(); } @Override public PaneInfo pane() { return inElement.getPane(); } @Override public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException { throw new RuntimeException("writePCollectionViewData() not supported in Streaming mode."); } @Override public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) { throw new RuntimeException("sideInput() not implemented."); } }; }
Example #6
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) { KeyedPCollectionTuple<K> input = context.getInput(transform); CoGbkResultSchema schema = input.getCoGbkResultSchema(); List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections(); KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0); KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1); TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag(); TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag(); PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection(); PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection(); DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1); DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2); TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo(); FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2); Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType()); Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType()); DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2, keySelector1, keySelector2, aggregator, typeInfo, null, transform.getName()); context.setOutputDataSet(context.getOutput(transform), out); }
Example #7
Source File: ParDoMultiOutputITCase.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForBatch(); PCollection<String> words = p.apply(Create.of("Hello", "Whatupmyman", "hey", "SPECIALthere", "MAAA", "MAAFOOO")); // Select words whose length is below a cut off, // plus the lengths of words that are above the cut off. // Also select words starting with "MARKER". final int wordLengthCutOff = 3; // Create tags to use for the main and side outputs. final TupleTag<String> wordsBelowCutOffTag = new TupleTag<String>(){}; final TupleTag<Integer> wordLengthsAboveCutOffTag = new TupleTag<Integer>(){}; final TupleTag<String> markedWordsTag = new TupleTag<String>(){}; PCollectionTuple results = words.apply(ParDo .withOutputTags(wordsBelowCutOffTag, TupleTagList.of(wordLengthsAboveCutOffTag) .and(markedWordsTag)) .of(new DoFn<String, String>() { final TupleTag<String> specialWordsTag = new TupleTag<String>() { }; public void processElement(ProcessContext c) { String word = c.element(); if (word.length() <= wordLengthCutOff) { c.output(word); } else { c.sideOutput(wordLengthsAboveCutOffTag, word.length()); } if (word.startsWith("MAA")) { c.sideOutput(markedWordsTag, word); } if (word.startsWith("SPECIAL")) { c.sideOutput(specialWordsTag, word); } } })); // Extract the PCollection results, by tag. PCollection<String> wordsBelowCutOff = results.get(wordsBelowCutOffTag); PCollection<Integer> wordLengthsAboveCutOff = results.get (wordLengthsAboveCutOffTag); PCollection<String> markedWords = results.get(markedWordsTag); markedWords.apply(TextIO.Write.to(resultPath)); p.run(); }
Example #8
Source File: FileToState.java From policyscanner with Apache License 2.0 | 4 votes |
public FileToState(TupleTag<GCPResourceErrorInfo> tag) { errorOutputTag = tag; }
Example #9
Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) { sideOutput(tag, output); }
Example #10
Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> void sideOutput(TupleTag<T> tag, T output) { // ignore the side output, this can happen when a user does not register // side outputs but then outputs using a freshly created TupleTag. }
Example #11
Source File: FlinkCoGroupKeyedListAggregator.java From flink-dataflow with Apache License 2.0 | 4 votes |
public FlinkCoGroupKeyedListAggregator(CoGbkResultSchema schema, TupleTag<?> tupleTag1, TupleTag<?> tupleTag2) { this.schema = schema; this.tupleTag1 = tupleTag1; this.tupleTag2 = tupleTag2; }
Example #12
Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) { sideOutput(tag, output); }
Example #13
Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0 | 4 votes |
public FlinkMultiOutputDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options, Map<TupleTag<?>, Integer> outputMap) { this.doFn = doFn; this.options = options; this.outputMap = outputMap; }
Example #14
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTranslationContext context) { DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform)); final DoFn<IN, OUT> doFn = transform.getFn(); Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll(); Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap(); // put the main output at index 0, FlinkMultiOutputDoFnFunction also expects this outputMap.put(transform.getMainOutputTag(), 0); int count = 1; for (TupleTag<?> tag: outputs.keySet()) { if (!outputMap.containsKey(tag)) { outputMap.put(tag, count++); } } // collect all output Coders and create a UnionCoder for our tagged outputs List<Coder<?>> outputCoders = Lists.newArrayList(); for (PCollection<?> coll: outputs.values()) { outputCoders.add(coll.getCoder()); } UnionCoder unionCoder = UnionCoder.of(outputCoders); @SuppressWarnings("unchecked") TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder); @SuppressWarnings("unchecked") FlinkMultiOutputDoFnFunction<IN, OUT> doFnWrapper = new FlinkMultiOutputDoFnFunction(doFn, context.getPipelineOptions(), outputMap); MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName()); transformSideInputs(transform.getSideInputs(), outputDataSet, context); for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) { TypeInformation<Object> outputType = context.getTypeInfo(output.getValue()); int outputTag = outputMap.get(output.getKey()); FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag); FlatMapOperator<RawUnionValue, Object> pruningOperator = new FlatMapOperator<>(outputDataSet, outputType, pruningFunction, output.getValue().getName()); context.setOutputDataSet(output.getValue(), pruningOperator); } }
Example #15
Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> void sideOutputWithTimestampHelper(WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<OUT>> outCollector, TupleTag<T> tag) { // ignore the side output, this can happen when a user does not register // side outputs but then outputs using a freshly created TupleTag. throw new RuntimeException("sideOutput() not not available in ParDo.Bound()."); }
Example #16
Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
public abstract <T> void sideOutputWithTimestampHelper( WindowedValue<IN> inElement, T output, Instant timestamp, Collector<WindowedValue<OUTFL>> outCollector, TupleTag<T> tag);
Example #17
Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) { sideOutputWithTimestampHelper(element, output, timestamp, collector, tag); }
Example #18
Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> void sideOutput(TupleTag<T> tag, T output) { sideOutputWithTimestamp(tag, output, this.element.getTimestamp()); }
Example #19
Source File: FlinkParDoBoundMultiWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
public FlinkParDoBoundMultiWrapper(PipelineOptions options, WindowingStrategy<?, ?> windowingStrategy, DoFn<IN, OUT> doFn, TupleTag<?> mainTag, Map<TupleTag<?>, Integer> tagsToLabels) { super(options, windowingStrategy, doFn); this.mainTag = Preconditions.checkNotNull(mainTag); this.outputLabels = Preconditions.checkNotNull(tagsToLabels); }
Example #20
Source File: DistributeWorkDataDoFn.java From data-timeseries-java with Apache License 2.0 | 4 votes |
public DistributeWorkDataDoFn(WorkPacketConfig workPacketView, TupleTag<Integer> tag) { this.workPacketView = workPacketView; this.tag = tag; }
Example #21
Source File: CreateWorkPacketsDoFn.java From data-timeseries-java with Apache License 2.0 | 4 votes |
public CreateWorkPacketsDoFn(WorkPacketConfig workPacketView, TupleTag<Integer> counter) { this.workPacketView = workPacketView; this.counter = counter; }
Example #22
Source File: ExtractState.java From policyscanner with Apache License 2.0 | 4 votes |
public ExtractState(TupleTag<GCPResourceErrorInfo> tag) { errorOutputTag = tag; }