Java Code Examples for org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple#empty()
The following examples show how to use
org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple#empty() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestExpansionService.java From beam with Apache License 2.0 | 5 votes |
@Override public KeyedPCollectionTuple<Long> createInput( Pipeline p, Map<String, PCollection<?>> inputs) { KeyedPCollectionTuple inputTuple = KeyedPCollectionTuple.empty(p); for (Map.Entry<String, PCollection<?>> entry : inputs.entrySet()) { inputTuple = inputTuple.and(new TupleTag(entry.getKey()), entry.getValue()); } return inputTuple; }
Example 2
Source File: CoGroup.java From beam with Apache License 2.0 | 4 votes |
private static JoinInformation from( PCollectionTuple input, Function<String, FieldAccessDescriptor> getFieldAccessDescriptor, Function<String, Boolean> getIsSideInput) { KeyedPCollectionTuple<Row> keyedPCollectionTuple = KeyedPCollectionTuple.empty(input.getPipeline()); List<String> sortedTags = input.getAll().keySet().stream() .map(TupleTag::getId) .sorted() .collect(Collectors.toList()); // Keep this in a TreeMap so that it's sorted. This way we get a deterministic output // schema. TreeMap<String, Schema> componentSchemas = Maps.newTreeMap(); Map<Integer, SerializableFunction<Object, Row>> toRows = Maps.newHashMap(); Map<String, PCollectionView<Map<Row, Iterable<Row>>>> sideInputs = Maps.newHashMap(); Map<Integer, String> tagToKeyedTag = Maps.newHashMap(); Schema keySchema = null; for (Map.Entry<TupleTag<?>, PCollection<?>> entry : input.getAll().entrySet()) { String tag = entry.getKey().getId(); int tagIndex = sortedTags.indexOf(tag); PCollection<?> pc = entry.getValue(); Schema schema = pc.getSchema(); componentSchemas.put(tag, schema); toRows.put(tagIndex, (SerializableFunction<Object, Row>) pc.getToRowFunction()); FieldAccessDescriptor fieldAccessDescriptor = getFieldAccessDescriptor.apply(tag); if (fieldAccessDescriptor == null) { throw new IllegalStateException("No fields were set for input " + tag); } // Resolve the key schema, keeping the fields in the order specified by the user. // Otherwise, if different field names are specified for different PCollections, they // might not match up. // The key schema contains the field names from the first PCollection specified. FieldAccessDescriptor resolved = fieldAccessDescriptor.resolve(schema); Schema currentKeySchema = SelectHelpers.getOutputSchema(schema, resolved); if (keySchema == null) { keySchema = currentKeySchema; } else { keySchema = SchemaUtils.mergeWideningNullable(keySchema, currentKeySchema); } // Create a new tag for the output. TupleTag randomTag = new TupleTag<>(); String keyedTag = tag + "_" + randomTag; tagToKeyedTag.put(tagIndex, keyedTag); PCollection<KV<Row, Row>> keyedPCollection = extractKey(pc, schema, keySchema, resolved, tag); if (getIsSideInput.apply(tag)) { sideInputs.put( keyedTag, keyedPCollection.apply("computeSideInputView" + tag, View.asMultimap())); } else { keyedPCollectionTuple = keyedPCollectionTuple.and(keyedTag, keyedPCollection); } } return new JoinInformation( keyedPCollectionTuple, sideInputs, keySchema, componentSchemas, toRows, sortedTags, tagToKeyedTag); }