org.apache.beam.sdk.util.CoderUtils Java Examples
The following examples show how to use
org.apache.beam.sdk.util.CoderUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IsmSideInputReader.java From beam with Apache License 2.0 | 6 votes |
/** * Finds the metadata associated with the specific key components. Returns null if the metadata * does not exist. */ private <V, T> T findMetadata( List<IsmReader<WindowedValue<V>>> readers, List<?> keyComponents, Coder<T> metadataCoder) throws IOException { // Find a set of reader iterators that have the requested key components. List<IsmReader<WindowedValue<V>>.IsmPrefixReaderIterator> readerIterators = findAndStartReaders(readers, keyComponents); if (readerIterators.isEmpty()) { return null; } // We expect at most one such reader iterator to have been returned. IsmReader<WindowedValue<V>>.IsmPrefixReaderIterator readerIterator = Iterables.getOnlyElement(readerIterators); // Decode the metadata return CoderUtils.decodeFromByteArray( metadataCoder, readerIterator.getCurrent().getValue().getMetadata()); }
Example #2
Source File: TDigestQuantilesTest.java From beam with Apache License 2.0 | 6 votes |
private <T> boolean encodeDecodeEquals(MergingDigest tDigest) throws IOException { MergingDigest decoded = CoderUtils.clone(new MergingDigestCoder(), tDigest); boolean equal = true; // the only way to compare the two sketches is to compare them centroid by centroid. // Indeed, the means are doubles but are encoded as float and cast during decoding. // This entails a small approximation that makes the centroids different after decoding. Iterator<Centroid> it1 = decoded.centroids().iterator(); Iterator<Centroid> it2 = tDigest.centroids().iterator(); for (int i = 0; i < decoded.centroids().size(); i++) { Centroid c1 = it1.next(); Centroid c2 = it2.next(); if ((float) c1.mean() != (float) c2.mean() || c1.count() != c2.count()) { equal = false; break; } } return equal; }
Example #3
Source File: TestStreamTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testTestStreamCoder() throws Exception { TestStream<String> testStream = TestStream.create(StringUtf8Coder.of()) .addElements("hey") .advanceWatermarkTo(Instant.ofEpochMilli(22521600)) .advanceProcessingTime(Duration.millis(42)) .addElements("hey", "joe") .advanceWatermarkToInfinity(); TestStream.TestStreamCoder<String> coder = TestStream.TestStreamCoder.of(StringUtf8Coder.of()); byte[] bytes = CoderUtils.encodeToByteArray(coder, testStream); TestStream<String> recoveredStream = CoderUtils.decodeFromByteArray(coder, bytes); assertThat(recoveredStream, is(testStream)); }
Example #4
Source File: SortValues.java From beam with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(ProcessContext c) { Iterable<KV<SecondaryKeyT, ValueT>> records = c.element().getValue(); try { Sorter sorter = BufferedExternalSorter.create(sorterOptions); for (KV<SecondaryKeyT, ValueT> record : records) { sorter.add( KV.of( CoderUtils.encodeToByteArray(keyCoder, record.getKey()), CoderUtils.encodeToByteArray(valueCoder, record.getValue()))); } c.output(KV.of(c.element().getKey(), new DecodingIterable(sorter.sort()))); } catch (IOException e) { throw new RuntimeException(e); } }
Example #5
Source File: PubsubIOTest.java From beam with Apache License 2.0 | 6 votes |
private <T> void setupTestClient(List<T> inputs, Coder<T> coder) { List<IncomingMessage> messages = inputs.stream() .map( t -> { try { return CoderUtils.encodeToByteArray(coder, t); } catch (CoderException e) { throw new RuntimeException(e); } }) .map( ba -> IncomingMessage.of( com.google.pubsub.v1.PubsubMessage.newBuilder() .setData(ByteString.copyFrom(ba)) .build(), 1234L, 0, UUID.randomUUID().toString(), UUID.randomUUID().toString())) .collect(Collectors.toList()); clientFactory = PubsubTestClient.createFactoryForPull(CLOCK, SUBSCRIPTION, 60, messages); }
Example #6
Source File: TextIOReadTest.java From beam with Apache License 2.0 | 6 votes |
private void runTestRead(String[] expected) throws Exception { File tmpFile = tempFolder.newFile(); String filename = tmpFile.getPath(); try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) { for (String elem : expected) { byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem); String line = new String(encodedElem, Charsets.UTF_8); writer.println(line); } } TextIO.Read read = TextIO.read().from(filename); PCollection<String> output = p.apply(read); PAssert.that(output).containsInAnyOrder(expected); p.run(); }
Example #7
Source File: ByteToWindowFunction.java From twister2 with Apache License 2.0 | 6 votes |
@Override public KV<K, Iterable<WindowedValue<V>>> map(Tuple<byte[], Iterator<byte[]>> input) { K key = null; Iterable<WindowedValue<V>> value = null; try { key = CoderUtils.decodeFromByteArray(keyCoder, input.getKey()); value = StreamSupport.stream( Spliterators.spliteratorUnknownSize(input.getValue(), Spliterator.ORDERED), false) .map(bytes -> TranslationUtils.fromByteArray(bytes, wvCoder)) .collect(Collectors.toList()); } catch (CoderException e) { e.printStackTrace(); } return KV.of(key, value); }
Example #8
Source File: ByteToWindowFunction.java From beam with Apache License 2.0 | 6 votes |
@Override public KV<K, Iterable<WindowedValue<V>>> map(Tuple<byte[], Iterator<byte[]>> input) { K key = null; Iterable<WindowedValue<V>> value = null; try { key = CoderUtils.decodeFromByteArray(keyCoder, input.getKey()); // TODO need to replace this with a custom iterator value = StreamSupport.stream( Spliterators.spliteratorUnknownSize(input.getValue(), Spliterator.ORDERED), false) .map(bytes -> TranslationUtils.fromByteArray(bytes, wvCoder)) .collect(Collectors.toList()); } catch (CoderException e) { LOG.info(e.getMessage()); } return KV.of(key, value); }
Example #9
Source File: IsmSideInputReaderTest.java From beam with Apache License 2.0 | 6 votes |
/** Each windowed value is expected to be within the same window. */ <K, V> List<IsmRecord<WindowedValue<V>>> forMapMetadata( Coder<K> keyCoder, Collection<K> keys, BoundedWindow window) throws Exception { List<IsmRecord<WindowedValue<V>>> rval = new ArrayList<>(); // Add the size metadata record rval.add( IsmRecord.<WindowedValue<V>>meta( ImmutableList.of(IsmFormat.getMetadataKey(), window, 0L), CoderUtils.encodeToByteArray(VarLongCoder.of(), (long) keys.size()))); // Add the positional entries for each key long i = 1L; for (K key : keys) { rval.add( IsmRecord.<WindowedValue<V>>meta( ImmutableList.of(IsmFormat.getMetadataKey(), window, i), CoderUtils.encodeToByteArray(keyCoder, key))); i += 1L; } return rval; }
Example #10
Source File: GroupingShuffleReader.java From beam with Apache License 2.0 | 6 votes |
@Override public boolean advance() throws IOException { try (Closeable read = tracker.enterState(readState)) { if (!groups.advance()) { current = null; return false; } } K key = CoderUtils.decodeFromByteArray(parentReader.keyCoder, groups.getCurrent().key); parentReader.executionContext.setKey(key); current = new ValueInEmptyWindows<>( KV.<K, Reiterable<V>>of(key, new ValuesIterable(groups.getCurrent().values))); return true; }
Example #11
Source File: PCollectionViews.java From beam with Apache License 2.0 | 6 votes |
/** * Returns the default value that was specified. * * <p>For internal use only. * * @throws NoSuchElementException if no default was specified. */ public T getDefaultValue() { if (!hasDefault) { throw new NoSuchElementException("Empty PCollection accessed as a singleton view."); } // Lazily decode the default value once synchronized (this) { if (encodedDefaultValue != null) { try { defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue); // Clear the encoded default value to free the reference once we have the object // version. Also, this will guarantee that the value will only be decoded once. encodedDefaultValue = null; } catch (IOException e) { throw new RuntimeException("Unexpected IOException: ", e); } } return defaultValue; } }
Example #12
Source File: ExecutableStageDoFnOperatorTest.java From beam with Apache License 2.0 | 6 votes |
private static BeamFnApi.StateRequest.Builder stateRequest(ByteString key, String userStateId) throws Exception { return BeamFnApi.StateRequest.newBuilder() .setStateKey( BeamFnApi.StateKey.newBuilder() .setBagUserState( BeamFnApi.StateKey.BagUserState.newBuilder() .setTransformId("transform") .setKey(key) .setUserStateId(userStateId) .setWindow( ByteString.copyFrom( CoderUtils.encodeToByteArray( GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE))) .build())); }
Example #13
Source File: PublishResultCodersTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFullPublishResultWithoutHeadersDecodeEncodeEquals() throws Exception { CoderProperties.coderDecodeEncodeEqual( PublishResultCoders.fullPublishResultWithoutHeaders(), new PublishResult().withMessageId(UUID.randomUUID().toString())); PublishResult value = buildFullPublishResult(); PublishResult clone = CoderUtils.clone(PublishResultCoders.fullPublishResultWithoutHeaders(), value); assertThat( clone.getSdkResponseMetadata().getRequestId(), equalTo(value.getSdkResponseMetadata().getRequestId())); assertThat( clone.getSdkHttpMetadata().getHttpStatusCode(), equalTo(value.getSdkHttpMetadata().getHttpStatusCode())); assertThat(clone.getSdkHttpMetadata().getHttpHeaders().isEmpty(), equalTo(true)); }
Example #14
Source File: PairWithConstantKeyDoFnFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public ParDoFn create( PipelineOptions options, CloudObject cloudUserFn, List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception { Coder<?> coder = CloudObjects.coderFromCloudObject( CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING))); Object key = CoderUtils.decodeFromByteArray( coder, Structs.getBytes(cloudUserFn, WorkerPropertyNames.ENCODED_KEY)); return new PairWithConstantKeyParDoFn(key); }
Example #15
Source File: StateNamespaces.java From beam with Apache License 2.0 | 6 votes |
@Override public String stringKey() { try { // equivalent to String.format("/%s/%s/", ...) return "/" + CoderUtils.encodeToBase64(windowCoder, window) + // Use base 36 so that can address 36 triggers in a single byte and still be human // readable. "/" + Integer.toString(triggerIndex, TRIGGER_RADIX).toUpperCase() + "/"; } catch (CoderException e) { throw new RuntimeException("Unable to generate string key from window " + window, e); } }
Example #16
Source File: PAssert.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollectionView<ActualT> expand(PBegin input) { final Coder<T> coder = actual.getCoder(); return actual .apply("FilterActuals", rewindowActuals.prepareActuals()) .apply("GatherPanes", GatherAllPanes.globally()) .apply("ExtractPane", MapElements.via(extractPane)) .setCoder(IterableCoder.of(actual.getCoder())) .apply(Flatten.iterables()) .apply("RewindowActuals", rewindowActuals.windowActuals()) .apply( ParDo.of( new DoFn<T, T>() { @ProcessElement public void processElement(ProcessContext context) throws CoderException { context.output(CoderUtils.clone(coder, context.element())); } })) .apply(actualView); }
Example #17
Source File: PublishResultCodersTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFullPublishResultIncludingHeadersDecodeEncodeEquals() throws Exception { CoderProperties.coderDecodeEncodeEqual( PublishResultCoders.fullPublishResult(), new PublishResult().withMessageId(UUID.randomUUID().toString())); PublishResult value = buildFullPublishResult(); PublishResult clone = CoderUtils.clone(PublishResultCoders.fullPublishResult(), value); assertThat( clone.getSdkResponseMetadata().getRequestId(), equalTo(value.getSdkResponseMetadata().getRequestId())); assertThat( clone.getSdkHttpMetadata().getHttpStatusCode(), equalTo(value.getSdkHttpMetadata().getHttpStatusCode())); assertThat( clone.getSdkHttpMetadata().getHttpHeaders(), equalTo(value.getSdkHttpMetadata().getHttpHeaders())); }
Example #18
Source File: MapToTupleFunction.java From beam with Apache License 2.0 | 6 votes |
@Override public Tuple<byte[], byte[]> map(WindowedValue<KV<K, V>> input) { Tuple<byte[], byte[]> element = null; WindowedValue<KV<K, WindowedValue<V>>> temp = WindowedValue.of( KV.of( input.getValue().getKey(), WindowedValue.of( input.getValue().getValue(), input.getTimestamp(), input.getWindows(), input.getPane())), input.getTimestamp(), input.getWindows(), input.getPane()); try { element = new Tuple<>( CoderUtils.encodeToByteArray(keyCoder, temp.getValue().getKey()), CoderUtils.encodeToByteArray(wvCoder, temp.getValue().getValue())); } catch (CoderException e) { LOG.info(e.getMessage()); } return element; }
Example #19
Source File: LengthPrefixCoderTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMultiCoderCycle() throws Exception { LengthPrefixCoder<Long> lengthPrefixedValueCoder = LengthPrefixCoder.of(BigEndianLongCoder.of()); LengthPrefixCoder<byte[]> lengthPrefixedBytesCoder = LengthPrefixCoder.of(ByteArrayCoder.of()); // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16] byte[] userEncoded = CoderUtils.encodeToByteArray(lengthPrefixedValueCoder, 22L); // [0, 0, 0, 0, 0, 0, 0, 0x16] byte[] decodedToBytes = CoderUtils.decodeFromByteArray(lengthPrefixedBytesCoder, userEncoded); // [0x08, 0, 0, 0, 0, 0, 0, 0, 0x16] byte[] reencodedBytes = CoderUtils.encodeToByteArray(lengthPrefixedBytesCoder, decodedToBytes); long userDecoded = CoderUtils.decodeFromByteArray(lengthPrefixedValueCoder, reencodedBytes); assertFalse( "Length-prefix decoding to bytes should drop the length", Arrays.equals(userEncoded, decodedToBytes)); assertArrayEquals(userEncoded, reencodedBytes); assertEquals(22L, userDecoded); }
Example #20
Source File: DAGBuilder.java From beam with Apache License 2.0 | 5 votes |
@Override public Object applyEx(byte[] b) throws Exception { if (coder == null) { return "ALL"; } else { WindowedValue<KV<K, V>> windowedValue = CoderUtils.decodeFromByteArray(coder, b); // todo: decoding twice.... KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder(); return CoderUtils.encodeToByteArray( kvCoder.getKeyCoder(), windowedValue.getValue().getKey()); } }
Example #21
Source File: ByteArrayCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEncodeThenMutate() throws Exception { byte[] input = {0x7, 0x3, 0xA, 0xf}; byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, input); input[1] = 0x9; byte[] decoded = CoderUtils.decodeFromByteArray(TEST_CODER, encoded); // now that I have mutated the input, the output should NOT match assertThat(input, not(equalTo(decoded))); }
Example #22
Source File: AvroCoderTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that {@link AvroCoder} works around issues in Avro where cache classes might be from the * wrong ClassLoader, causing confusing "Cannot cast X to X" error messages. */ @Test public void testTwoClassLoaders() throws Exception { ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); ClassLoader loader1 = new InterceptingUrlClassLoader(contextClassLoader, AvroCoderTestPojo.class.getName()); ClassLoader loader2 = new InterceptingUrlClassLoader(contextClassLoader, AvroCoderTestPojo.class.getName()); Class<?> pojoClass1 = loader1.loadClass(AvroCoderTestPojo.class.getName()); Class<?> pojoClass2 = loader2.loadClass(AvroCoderTestPojo.class.getName()); Object pojo1 = InstanceBuilder.ofType(pojoClass1).withArg(String.class, "hello").build(); Object pojo2 = InstanceBuilder.ofType(pojoClass2).withArg(String.class, "goodbye").build(); // Confirm incompatibility try { pojoClass2.cast(pojo1); fail("Expected ClassCastException; without it, this test is vacuous"); } catch (ClassCastException e) { // g2g } // The first coder is expected to populate the Avro SpecificData cache // The second coder is expected to be corrupted if the caching is done wrong. AvroCoder<Object> avroCoder1 = (AvroCoder) AvroCoder.of(pojoClass1); AvroCoder<Object> avroCoder2 = (AvroCoder) AvroCoder.of(pojoClass2); Object cloned1 = CoderUtils.clone(avroCoder1, pojo1); Object cloned2 = CoderUtils.clone(avroCoder2, pojo2); // Confirming that the uncorrupted coder is fine pojoClass1.cast(cloned1); // Confirmed to fail prior to the fix pojoClass2.cast(cloned2); }
Example #23
Source File: KvCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void encodeNullThrowsCoderException() throws Exception { thrown.expect(CoderException.class); thrown.expectMessage("cannot encode a null KV"); CoderUtils.encodeToBase64(TEST_CODER, null); }
Example #24
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
/** * Beam expects immutable objects, but the Hadoop InputFormats tend to re-use the same object * when returning them. Hence, mutable objects returned by Hadoop InputFormats are cloned. */ private <T> T cloneIfPossiblyMutable(T input, Coder<T> coder) throws CoderException, ClassCastException { // If the input object is not of known immutable type, clone the object. if (!isKnownImmutable(input)) { input = CoderUtils.clone(coder, input); } return input; }
Example #25
Source File: BigDecimalCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testGetEncodedElementByteSize() throws Exception { TestElementByteSizeObserver observer = new TestElementByteSizeObserver(); for (BigDecimal value : TEST_VALUES) { TEST_CODER.registerByteSizeObserver(value, observer); observer.advance(); assertThat( observer.getSumAndReset(), equalTo( (long) CoderUtils.encodeToByteArray(TEST_CODER, value, Coder.Context.NESTED).length)); } }
Example #26
Source File: UnboundedReadFromBoundedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCheckpointCoderNulls() throws Exception { CheckpointCoder<String> coder = new CheckpointCoder<>(StringUtf8Coder.of()); Checkpoint<String> emptyCheckpoint = new Checkpoint<>(null, null); Checkpoint<String> decodedEmptyCheckpoint = CoderUtils.decodeFromByteArray(coder, CoderUtils.encodeToByteArray(coder, emptyCheckpoint)); assertNull(decodedEmptyCheckpoint.getResidualElements()); assertNull(decodedEmptyCheckpoint.getResidualSource()); }
Example #27
Source File: TextualIntegerCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void encodeNullThrowsCoderException() throws Exception { thrown.expect(CoderException.class); thrown.expectMessage("cannot encode a null Integer"); CoderUtils.encodeToBase64(TEST_CODER, null); }
Example #28
Source File: AmqpMessageCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void encodeDecodeLargeMessage() throws Exception { Message message = Message.Factory.create(); message.setAddress("address"); message.setSubject("subject"); String body = Joiner.on("").join(Collections.nCopies(32 * 1024 * 1024, " ")); message.setBody(new AmqpValue(body)); AmqpMessageCoder coder = AmqpMessageCoder.of(); Message clone = CoderUtils.clone(coder, message); clone.getBody().toString().equals(message.getBody().toString()); }
Example #29
Source File: CommonCoderTest.java From beam with Apache License 2.0 | 5 votes |
/** * Utility for adding new entries to the common coder spec -- prints the serialized bytes of the * given value in the given context using JSON-escaped strings. */ private static <T> String jsonByteString(Coder<T> coder, T value, Context context) throws CoderException { byte[] bytes = CoderUtils.encodeToByteArray(coder, value, context); ObjectMapper mapper = new ObjectMapper(); mapper.configure(JsonGenerator.Feature.ESCAPE_NON_ASCII, true); try { return mapper.writeValueAsString(new String(bytes, StandardCharsets.ISO_8859_1)); } catch (JsonProcessingException e) { throw new CoderException(String.format("Unable to encode %s with coder %s", value, coder), e); } }
Example #30
Source File: ByteArrayCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void encodeNullThrowsCoderException() throws Exception { thrown.expect(CoderException.class); thrown.expectMessage("cannot encode a null byte[]"); CoderUtils.encodeToBase64(TEST_CODER, null); }