org.apache.beam.sdk.coders.ListCoder Java Examples
The following examples show how to use
org.apache.beam.sdk.coders.ListCoder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DirectRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that a {@link DoFn} that mutates its input with a good equals() fails in the {@link * DirectRunner}. */ @Test public void testMutatingInputDoFnError() throws Exception { Pipeline pipeline = getPipeline(); pipeline .apply( Create.of(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6)) .withCoder(ListCoder.of(VarIntCoder.of()))) .apply( ParDo.of( new DoFn<List<Integer>, Integer>() { @ProcessElement public void processElement(ProcessContext c) { List<Integer> inputList = c.element(); inputList.set(0, 37); c.output(12); } })); thrown.expect(IllegalMutationException.class); thrown.expectMessage("Input"); thrown.expectMessage("must not be mutated"); pipeline.run(); }
Example #2
Source File: GroupAlsoByWindowParDoFnFactory.java From beam with Apache License 2.0 | 6 votes |
private static <K, AccumT> AppliedCombineFn<K, AccumT, List<AccumT>, AccumT> makeAppliedMergingFunction( AppliedCombineFn<K, ?, AccumT, ?> appliedFn) { GlobalCombineFn<AccumT, List<AccumT>, AccumT> mergingCombineFn; if (appliedFn.getFn() instanceof CombineFnWithContext) { mergingCombineFn = new MergingKeyedCombineFnWithContext<>( (CombineFnWithContext<?, AccumT, ?>) appliedFn.getFn(), appliedFn.getAccumulatorCoder()); } else { mergingCombineFn = new MergingCombineFn<>( (CombineFn<?, AccumT, ?>) appliedFn.getFn(), appliedFn.getAccumulatorCoder()); } return AppliedCombineFn.<K, AccumT, List<AccumT>, AccumT>withAccumulatorCoder( mergingCombineFn, ListCoder.of(appliedFn.getAccumulatorCoder()), appliedFn.getSideInputViews(), KvCoder.of(appliedFn.getKvCoder().getKeyCoder(), appliedFn.getAccumulatorCoder()), appliedFn.getWindowingStrategy()); }
Example #3
Source File: WriteFiles.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<List<ResultT>> expand(PCollection<ResultT> input) { if (getWindowedWrites()) { // Reshuffle the results to make them stable against retries. // Use a single void key to maximize size of bundles for finalization. return input .apply("Add void key", WithKeys.of((Void) null)) .apply("Reshuffle", Reshuffle.of()) .apply("Drop key", Values.create()) .apply("Gather bundles", ParDo.of(new GatherBundlesPerWindowFn<>())) .setCoder(ListCoder.of(resultCoder)) // Reshuffle one more time to stabilize the contents of the bundle lists to finalize. .apply(Reshuffle.viaRandomKey()); } else { // Pass results via a side input rather than reshuffle, because we need to get an empty // iterable to finalize if there are no results. return input .getPipeline() .apply(Reify.viewInGlobalWindow(input.apply(View.asList()), ListCoder.of(resultCoder))); } }
Example #4
Source File: ProtoCoderTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCoderEncodeDecodeEqualNestedContext() throws Exception { MessageA value1 = MessageA.newBuilder() .setField1("hello") .addField2(MessageB.newBuilder().setField1(true).build()) .addField2(MessageB.newBuilder().setField1(false).build()) .build(); MessageA value2 = MessageA.newBuilder() .setField1("world") .addField2(MessageB.newBuilder().setField1(false).build()) .addField2(MessageB.newBuilder().setField1(true).build()) .build(); CoderProperties.coderDecodeEncodeEqual( ListCoder.of(ProtoCoder.of(MessageA.class)), ImmutableList.of(value1, value2)); }
Example #5
Source File: MutationDetectorsTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on an list of * arrays, even when some array is set to a deeply equal array that is not {@code equals}. */ @Test public void testEquivalentListOfArrays() throws Exception { List<byte[]> value = Arrays.asList(new byte[] {0x1}, new byte[] {0x2, 0x3}, new byte[] {0x4}); MutationDetector detector = MutationDetectors.forValueWithCoder(value, ListCoder.of(ByteArrayCoder.of())); value.set(0, new byte[] {0x1}); detector.verifyUnmodified(); }
Example #6
Source File: Watch.java From beam with Apache License 2.0 | 5 votes |
private GrowthStateCoder( Coder<OutputT> outputCoder, Coder<TerminationStateT> terminationStateCoder) { this.outputCoder = outputCoder; this.terminationStateCoder = terminationStateCoder; this.timestampedOutputCoder = ListCoder.of(TimestampedValue.TimestampedValueCoder.of(outputCoder)); }
Example #7
Source File: SerializableMatchers.java From beam with Apache License 2.0 | 5 votes |
public SerializableArrayViaCoder(Coder<T> elementCoder, T[] value) { this.coder = ListCoder.of(elementCoder); this.value = value; try { this.encodedValue = CoderUtils.encodeToByteArray(coder, Arrays.asList(value)); } catch (CoderException exc) { throw UserCodeException.wrap(exc); } }
Example #8
Source File: SchemaCoderHelpers.java From beam with Apache License 2.0 | 5 votes |
/** Returns the coder used for a given primitive type. */ public static <T> Coder<T> coderForFieldType(FieldType fieldType) { Coder<T> coder; switch (fieldType.getTypeName()) { case ROW: coder = (Coder<T>) SchemaCoder.of(fieldType.getRowSchema()); break; case ARRAY: coder = (Coder<T>) ListCoder.of(coderForFieldType(fieldType.getCollectionElementType())); break; case ITERABLE: coder = (Coder<T>) IterableCoder.of(coderForFieldType(fieldType.getCollectionElementType())); break; case MAP: coder = (Coder<T>) MapCoder.of( coderForFieldType(fieldType.getMapKeyType()), coderForFieldType(fieldType.getMapValueType())); break; case LOGICAL_TYPE: coder = new LogicalTypeCoder( fieldType.getLogicalType(), coderForFieldType(fieldType.getLogicalType().getBaseType())); break; default: coder = (Coder<T>) CODER_MAP.get(fieldType.getTypeName()); } Preconditions.checkNotNull(coder, "Unexpected field type " + fieldType.getTypeName()); if (fieldType.getNullable()) { coder = NullableCoder.of(coder); } return coder; }
Example #9
Source File: ParDoTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class}) public void testBagStateCoderInferenceFailure() throws Exception { final String stateId = "foo"; Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of(); DoFn<KV<String, Integer>, List<MyInteger>> fn = new DoFn<KV<String, Integer>, List<MyInteger>>() { @StateId(stateId) private final StateSpec<BagState<MyInteger>> bufferState = StateSpecs.bag(); @ProcessElement public void processElement( @Element KV<String, Integer> element, @StateId(stateId) BagState<MyInteger> state, OutputReceiver<List<MyInteger>> r) { state.add(new MyInteger(element.getValue())); Iterable<MyInteger> currentValue = state.read(); if (Iterables.size(currentValue) >= 4) { List<MyInteger> sorted = Lists.newArrayList(currentValue); Collections.sort(sorted); r.output(sorted); } } }; thrown.expect(RuntimeException.class); thrown.expectMessage("Unable to infer a coder for BagState and no Coder was specified."); pipeline .apply( Create.of( KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 84), KV.of("hello", 12))) .apply(ParDo.of(fn)) .setCoder(ListCoder.of(myIntegerCoder)); pipeline.run(); }
Example #10
Source File: FlattenTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(ValidatesRunner.class) public void testFlattenIterablesLists() { PCollection<List<String>> input = p.apply(Create.<List<String>>of(LINES).withCoder(ListCoder.of(StringUtf8Coder.of()))); PCollection<String> output = input.apply(Flatten.iterables()); PAssert.that(output).containsInAnyOrder(LINES_ARRAY); p.run(); }
Example #11
Source File: MutationDetectorsTest.java From beam with Apache License 2.0 | 5 votes |
/** Tests that {@link MutationDetectors#forValueWithCoder} detects a mutation to a list. */ @Test public void testMutatingList() throws Exception { List<Integer> value = Arrays.asList(1, 2, 3, 4); MutationDetector detector = MutationDetectors.forValueWithCoder(value, ListCoder.of(VarIntCoder.of())); value.set(0, 37); thrown.expect(IllegalMutationException.class); detector.verifyUnmodified(); }
Example #12
Source File: MutationDetectorsTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on a {@link * LinkedList} that will clone as an {@code ArrayList}. */ @Test public void testUnmodifiedLinkedList() throws Exception { List<Integer> value = Lists.newLinkedList(Arrays.asList(1, 2, 3, 4)); MutationDetector detector = MutationDetectors.forValueWithCoder(value, ListCoder.of(VarIntCoder.of())); detector.verifyUnmodified(); }
Example #13
Source File: BatchLoads.java From beam with Apache License 2.0 | 5 votes |
void writeSinglePartition( PCollection<KV<ShardedKey<DestinationT>, List<String>>> input, PCollectionView<String> loadJobIdPrefixView) { List<PCollectionView<?>> sideInputs = Lists.newArrayList(loadJobIdPrefixView); sideInputs.addAll(dynamicDestinations.getSideInputs()); Coder<KV<ShardedKey<DestinationT>, List<String>>> partitionsCoder = KvCoder.of( ShardedKeyCoder.of(NullableCoder.of(destinationCoder)), ListCoder.of(StringUtf8Coder.of())); // Write single partition to final table input .setCoder(partitionsCoder) // Reshuffle will distribute this among multiple workers, and also guard against // reexecution of the WritePartitions step once WriteTables has begun. .apply("SinglePartitionsReshuffle", Reshuffle.of()) .apply( "SinglePartitionWriteTables", new WriteTables<>( false, bigQueryServices, loadJobIdPrefixView, writeDisposition, createDisposition, sideInputs, dynamicDestinations, loadJobProjectId, maxRetryJobs, ignoreUnknownValues, kmsKey, rowWriterFactory.getSourceFormat(), useAvroLogicalTypes, schemaUpdateOptions)); }
Example #14
Source File: Write.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
@Override public WithFailures.Result<PDone, PubsubMessage> expand(PCollection<PubsubMessage> input) { ValueProvider<DynamicPathTemplate> pathTemplate = NestedValueProvider.of(outputPrefix, DynamicPathTemplate::new); ValueProvider<String> staticPrefix = NestedValueProvider.of(pathTemplate, value -> value.staticPrefix); FileIO.Write<List<String>, PubsubMessage> write = FileIO .<List<String>, PubsubMessage>writeDynamic() // We can't pass the attribute map to by() directly since MapCoder isn't // deterministic; // instead, we extract an ordered list of the needed placeholder values. // That list is later available to withNaming() to determine output location. .by(message -> pathTemplate.get() .extractValuesFrom(DerivedAttributesMap.of(message.getAttributeMap()))) .withDestinationCoder(ListCoder.of(StringUtf8Coder.of())) // .withCompression(compression) // .via(Contextful.fn(format::encodeSingleMessage), TextIO.sink()) // .to(staticPrefix) // .withNaming(placeholderValues -> NoColonFileNaming.defaultNaming( pathTemplate.get().replaceDynamicPart(placeholderValues), format.suffix())); if (inputType == InputType.pubsub) { // Passing a ValueProvider to withNumShards disables runner-determined sharding, so we // need to be careful to pass this only for streaming input (where runner-determined // sharding is not an option). write = write.withNumShards(numShards); } input // .apply(Window.<PubsubMessage>into(FixedWindows.of(windowDuration)) // We allow lateness up to the maximum Cloud Pub/Sub retention of 7 days documented in // https://cloud.google.com/pubsub/docs/subscriber .withAllowedLateness(Duration.standardDays(7)) // .discardingFiredPanes()) .apply(write); return WithFailures.Result.of(PDone.in(input.getPipeline()), EmptyErrors.in(input.getPipeline())); }
Example #15
Source File: HL7v2IO.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<HL7v2Message> expand(PBegin input) { CoderRegistry coderRegistry = input.getPipeline().getCoderRegistry(); coderRegistry.registerCoderForClass(HL7v2Message.class, HL7v2MessageCoder.of()); return input .apply(Create.ofProvider(this.hl7v2Stores, ListCoder.of(StringUtf8Coder.of()))) .apply(FlatMapElements.into(TypeDescriptors.strings()).via((x) -> x)) .apply(ParDo.of(new ListHL7v2MessagesFn(filter, initialSplitDuration))) .setCoder(HL7v2MessageCoder.of()) // Break fusion to encourage parallelization of downstream processing. .apply(Reshuffle.viaRandomKey()); }
Example #16
Source File: FakeBigQueryServices.java From beam with Apache License 2.0 | 5 votes |
public static String encodeQueryResult(Table table, List<TableRow> rows) throws IOException { KvCoder<String, List<TableRow>> coder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(TableRowJsonCoder.of())); KV<String, List<TableRow>> kv = KV.of(BigQueryHelpers.toJsonString(table), rows); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); coder.encode(kv, outputStream); return Base64.encodeBase64String(outputStream.toByteArray()); }
Example #17
Source File: FakeBigQueryServices.java From beam with Apache License 2.0 | 5 votes |
public static KV<Table, List<TableRow>> decodeQueryResult(String queryResult) throws IOException { KvCoder<String, List<TableRow>> coder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(TableRowJsonCoder.of())); ByteArrayInputStream inputStream = new ByteArrayInputStream(Base64.decodeBase64(queryResult)); KV<String, List<TableRow>> kv = coder.decode(inputStream); Table table = BigQueryHelpers.fromJsonString(kv.getKey(), Table.class); List<TableRow> rows = kv.getValue(); rows.forEach(FakeBigQueryServices::convertNumbers); return KV.of(table, rows); }
Example #18
Source File: FlinkBroadcastStateInternals.java From beam with Apache License 2.0 | 5 votes |
FlinkBroadcastBagState( OperatorStateBackend flinkStateBackend, StateTag<BagState<T>> address, StateNamespace namespace, Coder<T> coder) { super(flinkStateBackend, address.getId(), namespace, ListCoder.of(coder)); this.namespace = namespace; this.address = address; }
Example #19
Source File: AggregateCombineFn.java From components with Apache License 2.0 | 5 votes |
public Coder<List> getAccumulatorCoder() { AvroCoder valueCoder = null; if (avroSchemaStr != null) { valueCoder = AvroCoder.of(new Schema.Parser().parse(avroSchemaStr)); } return (Coder<List>) (avroSchemaStr == null ? ListCoder.of(NullableCoder.of(StringUtf8Coder.of())) : ListCoder.of(NullableCoder.of(valueCoder))); }
Example #20
Source File: KryoCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCodingWithKvCoderClassToBeEncoded() throws IOException { final KryoRegistrar registrar = k -> { k.register(TestClass.class); k.register(ClassToBeEncoded.class); }; final ListCoder<Void> listCoder = ListCoder.of(VoidCoder.of()); final KvCoder<ClassToBeEncoded, List<Void>> kvCoder = KvCoder.of(KryoCoder.of(OPTIONS, registrar), listCoder); final List<Void> inputValue = new ArrayList<>(); inputValue.add(null); inputValue.add(null); final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); final ClassToBeEncoded inputKey = new ClassToBeEncoded("something", 1, 0.2); kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream); final KV<ClassToBeEncoded, List<Void>> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())); assertNotNull(decoded); assertNotNull(decoded.getKey()); assertEquals(inputKey, decoded.getKey()); assertNotNull(decoded.getValue()); assertEquals(inputValue, decoded.getValue()); }
Example #21
Source File: KryoCoderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCodingWithKvCoderKeyIsKryoCoder() throws IOException { final KryoRegistrar registrar = k -> k.register(TestClass.class); final ListCoder<Void> listCoder = ListCoder.of(VoidCoder.of()); final KvCoder<TestClass, List<Void>> kvCoder = KvCoder.of(KryoCoder.of(OPTIONS, registrar), listCoder); final List<Void> inputValue = new ArrayList<>(); inputValue.add(null); inputValue.add(null); final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); final TestClass inputKey = new TestClass("something"); kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream); final KV<TestClass, List<Void>> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())); assertNotNull(decoded); assertNotNull(decoded.getKey()); assertEquals(inputKey, decoded.getKey()); assertNotNull(decoded.getValue()); assertEquals(inputValue, decoded.getValue()); }
Example #22
Source File: TextRowToMutationTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test(expected = PipelineExecutionException.class) public void parseRowToMutationTooManyColumns() throws Exception { PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton()); PCollectionView<Map<String, List<TableManifest.Column>>> tableColumnsMapView = pipeline .apply( "tableColumnsMap", Create.<Map<String, List<TableManifest.Column>>>of(getEmptyTableColumnsMap()) .withCoder( MapCoder.of( StringUtf8Coder.of(), ListCoder.of(ProtoCoder.of(TableManifest.Column.class))))) .apply("Map as view", View.asSingleton()); PCollection<KV<String, String>> input = pipeline.apply( "input", Create.of(KV.of(testTableName, "123,a string,yet another string,1.23,True,,,,,,,"))); PCollection<Mutation> mutations = input.apply( ParDo.of( new TextRowToMutation( ddlView, tableColumnsMapView, columnDelimiter, StaticValueProvider.of('"'), trailingDelimiter, escape, nullString, dateFormat, timestampFormat)) .withSideInputs(ddlView, tableColumnsMapView)); pipeline.run(); }
Example #23
Source File: TextRowToMutationTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test(expected = PipelineExecutionException.class) public void parseRowToMutationInvalidFormat() throws Exception { PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton()); PCollectionView<Map<String, List<TableManifest.Column>>> tableColumnsMapView = pipeline .apply( "tableColumnsMap", Create.<Map<String, List<TableManifest.Column>>>of(getEmptyTableColumnsMap()) .withCoder( MapCoder.of( StringUtf8Coder.of(), ListCoder.of(ProtoCoder.of(TableManifest.Column.class))))) .apply("Map as view", View.asSingleton()); PCollection<KV<String, String>> input = pipeline.apply( "input", Create.of( KV.of(testTableName, "123,a string,yet another string,1.23,True,99999/99/99"))); PCollection<Mutation> mutations = input.apply( ParDo.of( new TextRowToMutation( ddlView, tableColumnsMapView, columnDelimiter, fieldQualifier, trailingDelimiter, escape, nullString, dateFormat, timestampFormat)) .withSideInputs(ddlView, tableColumnsMapView)); pipeline.run(); }
Example #24
Source File: CreateStreamingFlinkView.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) { return ListCoder.of(inputCoder); }
Example #25
Source File: FlinkBatchPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) { return ListCoder.of(inputCoder); }
Example #26
Source File: TextRowToMutationTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Test public void parseRowToMutationCustomizedDimiterAndFieldQulifier() { PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton()); PCollectionView<Map<String, List<TableManifest.Column>>> tableColumnsMapView = pipeline .apply( "tableColumnsMap", Create.<Map<String, List<TableManifest.Column>>>of(getEmptyTableColumnsMap()) .withCoder( MapCoder.of( StringUtf8Coder.of(), ListCoder.of(ProtoCoder.of(TableManifest.Column.class))))) .apply("Map as view", View.asSingleton()); PCollection<KV<String, String>> input = pipeline.apply( "input", Create.of( KV.of( testTableName, "123|`str1 with |`|`\"str2\"+ \"'\"|`"))); PCollection<Mutation> mutations = input.apply( ParDo.of( new TextRowToMutation( ddlView, tableColumnsMapView, StaticValueProvider.of('|'), StaticValueProvider.of('`'), trailingDelimiter, escape, nullString, dateFormat, timestampFormat)) .withSideInputs(ddlView, tableColumnsMapView)); PAssert.that(mutations) .containsInAnyOrder( Mutation.newInsertOrUpdateBuilder(testTableName) .set("int_col") .to(123) .set("str_10_col") .to("str1 with |") .set("str_max_col") .to("\"str2\"+ \"'\"|") .build()); pipeline.run(); }
Example #27
Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) { return ListCoder.of(inputCoder); }
Example #28
Source File: FlinkBatchTransformTranslators.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) { return ListCoder.of(inputCoder); }
Example #29
Source File: SamzaPublishViewTransformOverride.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) { return ListCoder.of(inputCoder); }
Example #30
Source File: SamzaPublishViewTransformOverride.java From beam with Apache License 2.0 | 4 votes |
@Override public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) { return ListCoder.of(inputCoder); }