org.apache.beam.sdk.coders.Coder Java Examples
The following examples show how to use
org.apache.beam.sdk.coders.Coder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GroupCombineFunctions.java From beam with Apache License 2.0 | 6 votes |
/** * An implementation of {@link * org.apache.beam.runners.core.GroupByKeyViaGroupByKeyOnly.GroupByKeyOnly} for the Spark runner. */ public static <K, V> JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupByKeyOnly( JavaRDD<WindowedValue<KV<K, V>>> rdd, Coder<K> keyCoder, WindowedValueCoder<V> wvCoder, @Nullable Partitioner partitioner) { // we use coders to convert objects in the PCollection to byte arrays, so they // can be transferred over the network for the shuffle. JavaPairRDD<ByteArray, byte[]> pairRDD = rdd.map(new ReifyTimestampsAndWindowsFunction<>()) .mapToPair(TranslationUtils.toPairFunction()) .mapToPair(CoderHelpers.toByteFunction(keyCoder, wvCoder)); // If no partitioner is passed, the default group by key operation is called JavaPairRDD<ByteArray, Iterable<byte[]>> groupedRDD = (partitioner != null) ? pairRDD.groupByKey(partitioner) : pairRDD.groupByKey(); return groupedRDD .mapToPair(CoderHelpers.fromByteFunctionIterable(keyCoder, wvCoder)) .map(new TranslationUtils.FromPairFunction<>()); }
Example #2
Source File: SplittableParDo.java From beam with Apache License 2.0 | 6 votes |
/** * Creates the transform for a {@link ParDo}-compatible {@link AppliedPTransform}. * * <p>The input may generally be a deserialized transform so it may not actually be a {@link * ParDo}. Instead {@link ParDoTranslation} will be used to extract fields. */ @SuppressWarnings({"unchecked", "rawtypes"}) public static <InputT, OutputT> SplittableParDo<InputT, OutputT, ?, ?> forAppliedParDo( AppliedPTransform<PCollection<InputT>, PCollectionTuple, ?> parDo) { checkArgument(parDo != null, "parDo must not be null"); try { Map<TupleTag<?>, Coder<?>> outputTagsToCoders = Maps.newHashMap(); for (Map.Entry<TupleTag<?>, PValue> entry : parDo.getOutputs().entrySet()) { outputTagsToCoders.put(entry.getKey(), ((PCollection) entry.getValue()).getCoder()); } return new SplittableParDo( ParDoTranslation.getDoFn(parDo), ParDoTranslation.getSideInputs(parDo), ParDoTranslation.getMainOutputTag(parDo), ParDoTranslation.getAdditionalOutputTags(parDo), outputTagsToCoders); } catch (IOException exc) { throw new RuntimeException(exc); } }
Example #3
Source File: FnApiDoFnRunner.java From beam with Apache License 2.0 | 6 votes |
private void startBundle() { // Register as a consumer for each timer. timerHandlers = new HashMap<>(); for (Map.Entry<String, KV<TimeDomain, Coder<Timer<Object>>>> timerFamilyInfo : timerFamilyInfos.entrySet()) { String localName = timerFamilyInfo.getKey(); TimeDomain timeDomain = timerFamilyInfo.getValue().getKey(); Coder<Timer<Object>> timerCoder = timerFamilyInfo.getValue().getValue(); timerHandlers.put( localName, beamFnTimerClient.register( LogicalEndpoint.timer(processBundleInstructionId.get(), pTransformId, localName), timerCoder, (FnDataReceiver<Timer<Object>>) timer -> processTimer(localName, timeDomain, timer))); } doFnInvoker.invokeStartBundle(startBundleArgumentProvider); }
Example #4
Source File: HBaseMutationCoder.java From beam with Apache License 2.0 | 6 votes |
@Override public <T> Coder<T> coderFor( TypeDescriptor<T> typeDescriptor, List<? extends Coder<?>> componentCoders) throws CannotProvideCoderException { if (!typeDescriptor.isSubtypeOf(HBASE_MUTATION_TYPE_DESCRIPTOR)) { throw new CannotProvideCoderException( String.format( "Cannot provide %s because %s is not a subclass of %s", HBaseMutationCoder.class.getSimpleName(), typeDescriptor, Mutation.class.getName())); } try { @SuppressWarnings("unchecked") Coder<T> coder = (Coder<T>) HBaseMutationCoder.of(); return coder; } catch (IllegalArgumentException e) { throw new CannotProvideCoderException(e); } }
Example #5
Source File: FlinkStreamingSideInputHandlerFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput( String transformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) { PCollectionView collectionNode = sideInputToCollection.get( SideInputId.newBuilder().setTransformId(transformId).setLocalName(sideInputId).build()); checkArgument(collectionNode != null, "No side input for %s/%s", transformId, sideInputId); return new IterableSideInputHandler<V, W>() { @Override public Iterable<V> get(W window) { return checkNotNull( (Iterable<V>) runnerHandler.getIterable(collectionNode, window), "Element processed by SDK before side input is ready"); } @Override public Coder<V> elementCoder() { return elementCoder; } }; }
Example #6
Source File: GroupingShuffleReader.java From beam with Apache License 2.0 | 6 votes |
public GroupingShuffleReader( PipelineOptions options, byte[] shuffleReaderConfig, @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext, DataflowOperationContext operationContext, ShuffleReadCounterFactory shuffleReadCounterFactory, boolean valuesAreSorted) throws Exception { this.options = options; this.shuffleReaderConfig = shuffleReaderConfig; this.startShufflePosition = startShufflePosition; this.stopShufflePosition = stopShufflePosition; this.executionContext = executionContext; this.operationContext = operationContext; this.shuffleReadCounterFactory = shuffleReadCounterFactory; initCoder(coder, valuesAreSorted); // We cannot initialize perOperationPerDatasetBytesCounter here, as it // depends on shuffleReaderConfig, which isn't populated yet. }
Example #7
Source File: FlinkBroadcastStateInternals.java From beam with Apache License 2.0 | 6 votes |
AbstractBroadcastState( OperatorStateBackend flinkStateBackend, String name, StateNamespace namespace, Coder<T> coder) { this.name = name; this.namespace = namespace; this.flinkStateBackend = flinkStateBackend; CoderTypeInformation<Map<String, T>> typeInfo = new CoderTypeInformation<>(MapCoder.of(StringUtf8Coder.of(), coder)); flinkStateDescriptor = new ListStateDescriptor<>(name, typeInfo.createSerializer(new ExecutionConfig())); }
Example #8
Source File: StateSpecs.java From beam with Apache License 2.0 | 6 votes |
/** * <b><i>For internal use only; no backwards-compatibility guarantees.</i></b> * * <p>Create a state spec for values that use a {@link CombineFn} to automatically merge multiple * {@code InputT}s into a single {@code OutputT}. * * <p>This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and should * only be used to initialize static values. */ @Internal public static <InputT, AccumT, OutputT> StateSpec<CombiningState<InputT, AccumT, OutputT>> combiningFromInputInternal( Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) { try { Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(STANDARD_REGISTRY, inputCoder); return combiningInternal(accumCoder, combineFn); } catch (CannotProvideCoderException e) { throw new IllegalArgumentException( "Unable to determine accumulator coder for " + combineFn.getClass().getSimpleName() + " from " + inputCoder, e); } }
Example #9
Source File: GroupByKeyAndWindowDoFnTransform.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * GroupByKey constructor. * * @param outputCoders output coders * @param mainOutputTag main output tag * @param windowingStrategy windowing strategy * @param options pipeline options * @param reduceFn reduce function * @param displayData display data. */ public GroupByKeyAndWindowDoFnTransform(final Map<TupleTag<?>, Coder<?>> outputCoders, final TupleTag<KV<K, Iterable<InputT>>> mainOutputTag, final WindowingStrategy<?, ?> windowingStrategy, final PipelineOptions options, final SystemReduceFn reduceFn, final DisplayData displayData) { super(null, /* doFn */ null, /* inputCoder */ outputCoders, mainOutputTag, Collections.emptyList(), /* GBK does not have additional outputs */ windowingStrategy, Collections.emptyMap(), /* GBK does not have additional side inputs */ options, displayData, DoFnSchemaInformation.create(), Collections.emptyMap()); this.keyToValues = new HashMap<>(); this.reduceFn = reduceFn; this.prevOutputWatermark = new Watermark(Long.MIN_VALUE); this.keyAndWatermarkHoldMap = new HashMap<>(); }
Example #10
Source File: IsmFormat.java From beam with Apache License 2.0 | 5 votes |
/** Returns an IsmRecordCoder with the specified key component coders, value coder. */ public static <V> IsmRecordCoder<V> of( int numberOfShardKeyCoders, int numberOfMetadataShardKeyCoders, List<Coder<?>> keyComponentCoders, Coder<V> valueCoder) { checkNotNull(keyComponentCoders); checkArgument(keyComponentCoders.size() > 0); checkArgument(numberOfShardKeyCoders > 0); checkArgument(numberOfShardKeyCoders <= keyComponentCoders.size()); checkArgument(numberOfMetadataShardKeyCoders <= keyComponentCoders.size()); return new IsmRecordCoder<>( numberOfShardKeyCoders, numberOfMetadataShardKeyCoders, keyComponentCoders, valueCoder); }
Example #11
Source File: FlinkBroadcastStateInternals.java From beam with Apache License 2.0 | 5 votes |
FlinkKeyedCombiningState( OperatorStateBackend flinkStateBackend, StateTag<CombiningState<InputT, AccumT, OutputT>> address, Combine.CombineFn<InputT, AccumT, OutputT> combineFn, StateNamespace namespace, Coder<AccumT> accumCoder, FlinkBroadcastStateInternals<K2> flinkStateInternals) { super(flinkStateBackend, address.getId(), namespace, accumCoder); this.namespace = namespace; this.address = address; this.combineFn = combineFn; this.flinkStateInternals = flinkStateInternals; }
Example #12
Source File: FlinkBatchTranslationContext.java From beam with Apache License 2.0 | 5 votes |
<T> TypeInformation<WindowedValue<T>> getTypeInfo( Coder<T> coder, WindowingStrategy<?, ?> windowingStrategy) { WindowedValue.FullWindowedValueCoder<T> windowedValueCoder = WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder()); return new CoderTypeInformation<>(windowedValueCoder); }
Example #13
Source File: ProcessBundleDescriptorsTest.java From beam with Apache License 2.0 | 5 votes |
private static void ensureLengthPrefixed( RunnerApi.Coder coder, RunnerApi.Coder originalCoder, Map<String, RunnerApi.Coder> pbsCoderMap) { assertThat(coder.getSpec().getUrn(), is(ModelCoders.LENGTH_PREFIX_CODER_URN)); // Check that the wrapped coder is unchanged String lengthPrefixedWrappedCoderId = coder.getComponentCoderIds(0); assertThat(pbsCoderMap.get(lengthPrefixedWrappedCoderId), is(originalCoder)); }
Example #14
Source File: SamzaTimerInternalsFactory.java From beam with Apache License 2.0 | 5 votes |
private SamzaTimerInternalsFactory( Coder<K> keyCoder, Scheduler<KeyedTimerData<K>> timerRegistry, String timerStateId, SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory, Coder<BoundedWindow> windowCoder, IsBounded isBounded) { this.keyCoder = keyCoder; this.timerRegistry = timerRegistry; this.eventTimeTimers = new TreeSet<>(); this.state = new SamzaTimerState(timerStateId, nonKeyedStateInternalsFactory, windowCoder); this.isBounded = isBounded; }
Example #15
Source File: RunnerPCollectionView.java From beam with Apache License 2.0 | 5 votes |
/** Create a new {@link RunnerPCollectionView} from the provided components. */ public RunnerPCollectionView( @Nullable PCollection<?> pCollection, TupleTag<Iterable<WindowedValue<?>>> tag, ViewFn<Iterable<WindowedValue<?>>, T> viewFn, WindowMappingFn<?> windowMappingFn, @Nullable WindowingStrategy<?, ?> windowingStrategy, @Nullable Coder<?> coder) { this.pCollection = pCollection; this.tag = tag; this.viewFn = viewFn; this.windowMappingFn = windowMappingFn; this.windowingStrategy = windowingStrategy; this.coder = coder; }
Example #16
Source File: WindmillSink.java From beam with Apache License 2.0 | 5 votes |
WindmillSink( String destinationName, Coder<WindowedValue<T>> coder, StreamingModeExecutionContext context) { this.writer = new WindmillStreamWriter(destinationName); FullWindowedValueCoder<T> inputCoder = (FullWindowedValueCoder<T>) coder; this.valueCoder = inputCoder.getValueCoder(); this.windowsCoder = inputCoder.getWindowsCoder(); this.context = context; }
Example #17
Source File: CloudObjectTranslators.java From beam with Apache License 2.0 | 5 votes |
public static CloudObjectTranslator<NullableCoder> nullable() { return new CloudObjectTranslator<NullableCoder>() { @Override public CloudObject toCloudObject(NullableCoder target, SdkComponents sdkComponents) { CloudObject base = CloudObject.forClass(NullableCoder.class); return addComponents( base, Collections.<Coder<?>>singletonList(target.getValueCoder()), sdkComponents); } @Override public NullableCoder<?> fromCloudObject(CloudObject cloudObject) { List<Coder<?>> componentList = getComponents(cloudObject); checkArgument( componentList.size() == 1, "Expected 1 component for %s, got %s", NullableCoder.class.getSimpleName(), componentList.size()); return NullableCoder.of(componentList.get(0)); } @Override public Class<? extends NullableCoder> getSupportedClass() { return NullableCoder.class; } @Override public String cloudObjectClassName() { return CloudObject.forClass(NullableCoder.class).getClassName(); } }; }
Example #18
Source File: StateSpecs.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void offerCoders(Coder[] coders) { if (this.accumCoder == null && coders[2] != null) { this.accumCoder = (Coder<AccumT>) coders[2]; } }
Example #19
Source File: CalculateCoverage.java From dataflow-java with Apache License 2.0 | 5 votes |
public static void registerPipelineCoders(Pipeline p) { CoderRegistry cr = p.getCoderRegistry(); cr.registerCoderForClass(Annotation.class, (Coder<Annotation>) GenericJsonCoder.of(Annotation.class)); cr.registerCoderForClass(AnnotationSet.class, (Coder<AnnotationSet>) GenericJsonCoder.of(AnnotationSet.class)); cr.registerCoderForClass(BatchCreateAnnotationsRequest.class, (Coder<BatchCreateAnnotationsRequest>) GenericJsonCoder .of(BatchCreateAnnotationsRequest.class)); cr.registerCoderForClass(PosRgsMq.class, (Coder<PosRgsMq>) GenericJsonCoder.of(PosRgsMq.class)); cr.registerCoderForClass(Position.class, (Coder<Position>) GenericJsonCoder.of(Position.class)); }
Example #20
Source File: FnApiWindowMappingFnTest.java From beam with Apache License 2.0 | 5 votes |
@Override public <T> InboundDataClient receive( LogicalEndpoint inputLocation, Coder<T> coder, FnDataReceiver<T> consumer) { this.inboundReceiver = (FnDataReceiver) consumer; this.inboundDataClient = CompletableFutureInboundDataClient.create(); return inboundDataClient; }
Example #21
Source File: StreamingGroupAlsoByWindowsDoFns.java From beam with Apache License 2.0 | 5 votes |
public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> GroupAlsoByWindowFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create( final WindowingStrategy<?, W> windowingStrategy, StateInternalsFactory<K> stateInternalsFactory, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn, final Coder<K> keyCoder) { Preconditions.checkNotNull(combineFn); return StreamingGroupAlsoByWindowViaWindowSetFn.create( windowingStrategy, stateInternalsFactory, SystemReduceFn.combining(keyCoder, combineFn)); }
Example #22
Source File: SchemaCoderHelpers.java From beam with Apache License 2.0 | 5 votes |
/** Returns the coder used for a given primitive type. */ public static <T> Coder<T> coderForFieldType(FieldType fieldType) { Coder<T> coder; switch (fieldType.getTypeName()) { case ROW: coder = (Coder<T>) SchemaCoder.of(fieldType.getRowSchema()); break; case ARRAY: coder = (Coder<T>) ListCoder.of(coderForFieldType(fieldType.getCollectionElementType())); break; case ITERABLE: coder = (Coder<T>) IterableCoder.of(coderForFieldType(fieldType.getCollectionElementType())); break; case MAP: coder = (Coder<T>) MapCoder.of( coderForFieldType(fieldType.getMapKeyType()), coderForFieldType(fieldType.getMapValueType())); break; case LOGICAL_TYPE: coder = new LogicalTypeCoder( fieldType.getLogicalType(), coderForFieldType(fieldType.getLogicalType().getBaseType())); break; default: coder = (Coder<T>) CODER_MAP.get(fieldType.getTypeName()); } Preconditions.checkNotNull(coder, "Unexpected field type " + fieldType.getTypeName()); if (fieldType.getNullable()) { coder = NullableCoder.of(coder); } return coder; }
Example #23
Source File: LengthPrefixUnknownCodersTest.java From beam with Apache License 2.0 | 5 votes |
/** Test replacing unknown coders with {@code LengthPrefixCoder<ByteArray>} */ @Test public void testLengthPrefixAndReplaceUnknownCoder() throws Exception { Coder<WindowedValue<KV<String, Integer>>> windowedValueCoder = WindowedValue.getFullCoder( KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), GlobalWindow.Coder.INSTANCE); Map<String, Object> lengthPrefixedCoderCloudObject = forCodec(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/ null), true); assertEqualsAsJson( CloudObjects.asCloudObject(prefixedAndReplacedWindowedValueCoder, /*sdkComponents=*/ null), lengthPrefixedCoderCloudObject); }
Example #24
Source File: GroupByKey.java From beam with Apache License 2.0 | 5 votes |
/** * Returns the {@code Coder} of the input to this transform, which should be a {@code KvCoder}. */ @SuppressWarnings("unchecked") static <K, V> KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) { if (!(inputCoder instanceof KvCoder)) { throw new IllegalStateException("GroupByKey requires its input to use KvCoder"); } return (KvCoder<K, V>) inputCoder; }
Example #25
Source File: TestStreamTranslation.java From beam with Apache License 2.0 | 5 votes |
static <T> RunnerApi.TestStreamPayload.Event eventToProto( TestStream.Event<T> event, Coder<T> coder) throws IOException { switch (event.getType()) { case WATERMARK: return RunnerApi.TestStreamPayload.Event.newBuilder() .setWatermarkEvent( RunnerApi.TestStreamPayload.Event.AdvanceWatermark.newBuilder() .setNewWatermark( ((TestStream.WatermarkEvent<T>) event).getWatermark().getMillis())) .build(); case PROCESSING_TIME: return RunnerApi.TestStreamPayload.Event.newBuilder() .setProcessingTimeEvent( RunnerApi.TestStreamPayload.Event.AdvanceProcessingTime.newBuilder() .setAdvanceDuration( ((TestStream.ProcessingTimeEvent<T>) event) .getProcessingTimeAdvance() .getMillis())) .build(); case ELEMENT: RunnerApi.TestStreamPayload.Event.AddElements.Builder builder = RunnerApi.TestStreamPayload.Event.AddElements.newBuilder(); for (TimestampedValue<T> element : ((TestStream.ElementEvent<T>) event).getElements()) { builder.addElements( RunnerApi.TestStreamPayload.TimestampedElement.newBuilder() .setTimestamp(element.getTimestamp().getMillis()) .setEncodedElement( ByteString.copyFrom( CoderUtils.encodeToByteArray(coder, element.getValue())))); } return RunnerApi.TestStreamPayload.Event.newBuilder().setElementEvent(builder).build(); default: throw new IllegalArgumentException( String.format( "Unsupported type of %s: %s", TestStream.Event.class.getCanonicalName(), event.getType())); } }
Example #26
Source File: CombineFns.java From beam with Apache License 2.0 | 5 votes |
@Override public Coder<Object[]> getAccumulatorCoder(CoderRegistry registry, Coder<DataT> dataCoder) throws CannotProvideCoderException { List<Coder<Object>> coders = Lists.newArrayList(); for (int i = 0; i < combineFnCount; ++i) { Coder<Object> inputCoder = combineInputCoders.get(i).isPresent() ? combineInputCoders.get(i).get() : registry.getOutputCoder(extractInputFns.get(i), dataCoder); coders.add(combineFns.get(i).getAccumulatorCoder(registry, inputCoder)); } return new ComposedAccumulatorCoder(coders); }
Example #27
Source File: Create.java From beam with Apache License 2.0 | 5 votes |
/** * Create a new source with the specified bytes. The new source owns the input element bytes, * which must not be modified after this constructor is called. */ private CreateSource(List<byte[]> elementBytes, long totalSize, Coder<T> coder) { super(0, elementBytes.size(), 1); this.allElementsBytes = ImmutableList.copyOf(elementBytes); this.totalSize = totalSize; this.coder = coder; }
Example #28
Source File: CoderTranslation.java From beam with Apache License 2.0 | 5 votes |
private static Map<Class<? extends Coder>, CoderTranslator<? extends Coder>> loadTranslators() { ImmutableMap.Builder<Class<? extends Coder>, CoderTranslator<? extends Coder>> translators = ImmutableMap.builder(); for (CoderTranslatorRegistrar coderTranslatorRegistrar : ServiceLoader.load(CoderTranslatorRegistrar.class)) { translators.putAll(coderTranslatorRegistrar.getCoderTranslators()); } return translators.build(); }
Example #29
Source File: CoderHelpers.java From beam with Apache License 2.0 | 5 votes |
/** * Utility method for serializing an object using the specified coder. * * @param value Value to serialize. * @param coder Coder to serialize with. * @param <T> type of value that is serialized * @return Byte array representing serialized object. */ public static <T> byte[] toByteArray(T value, Coder<T> coder) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { coder.encode(value, baos); } catch (IOException e) { throw new IllegalStateException("Error encoding value: " + value, e); } return baos.toByteArray(); }
Example #30
Source File: GroupIntoBatches.java From beam with Apache License 2.0 | 5 votes |
GroupIntoBatchesDoFn( long batchSize, Duration allowedLateness, Coder<K> inputKeyCoder, Coder<InputT> inputValueCoder) { this.batchSize = batchSize; this.allowedLateness = allowedLateness; this.batchSpec = StateSpecs.bag(inputValueCoder); this.numElementsInBatchSpec = StateSpecs.combining( new Combine.BinaryCombineLongFn() { @Override public long identity() { return 0L; } @Override public long apply(long left, long right) { return left + right; } }); this.keySpec = StateSpecs.value(inputKeyCoder); // prefetch every 20% of batchSize elements. Do not prefetch if batchSize is too little this.prefetchFrequency = ((batchSize / 5) <= 1) ? Long.MAX_VALUE : (batchSize / 5); }