org.apache.beam.sdk.values.TypeDescriptors Java Examples
The following examples show how to use
org.apache.beam.sdk.values.TypeDescriptors.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordCount.java From java-docs-samples with Apache License 2.0 | 7 votes |
public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args) .withValidation().as(WordCountOptions.class); Pipeline pipeline = Pipeline.create(options); pipeline .apply("Read lines", TextIO.read().from(options.getInputFile())) // [END value_provider] .apply("Find words", FlatMapElements.into(TypeDescriptors.strings()) .via((String line) -> Arrays.asList(line.split("[^\\p{L}]+")))) .apply("Filter empty words", Filter.by((String word) -> !word.isEmpty())) .apply("Filter with substring", ParDo.of(new FilterWithSubstring( options.getWithSubstring(), options.getIsCaseSensitive()))) .apply("Count words", Count.perElement()) .apply("Format results", MapElements.into(TypeDescriptors.strings()) .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())) // [START nested_value_provider] .apply("Write results", TextIO.write().to(NestedValueProvider.of( options.getOutputBucket(), (String bucket) -> String.format("gs://%s/samples/dataflow/wordcount/outputs", bucket) ))); // [END nested_value_provider] pipeline.run(); }
Example #2
Source File: DistinctTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWindow_applyIf() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<String> uniq = Distinct.of(dataset) .applyIf( true, b -> b.windowBy(FixedWindows.of(Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .discardingFiredPanes()) .output(); final Distinct distinct = (Distinct) TestUtils.getProducer(uniq); assertTrue(distinct.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get()); assertEquals( FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn()); assertEquals(DefaultTrigger.of(), windowDesc.getTrigger()); assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode()); }
Example #3
Source File: TopPerKeyTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWindow_applyIf() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<Triple<String, Long, Long>> result = TopPerKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .scoreBy(s -> 1L) .applyIf( true, b -> b.windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .accumulatingFiredPanes()) .output(); final TopPerKey tpk = (TopPerKey) TestUtils.getProducer(result); assertTrue(tpk.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) tpk.getWindow().get()); assertEquals( FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn()); assertEquals(DefaultTrigger.of(), windowDesc.getTrigger()); assertEquals(AccumulationMode.ACCUMULATING_FIRED_PANES, windowDesc.getAccumulationMode()); }
Example #4
Source File: FhirIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void test_FhirIO_failedReads() { List<String> badMessageIDs = Arrays.asList("foo", "bar"); FhirIO.Read.Result readResult = pipeline.apply(Create.of(badMessageIDs)).apply(FhirIO.readResources()); PCollection<HealthcareIOError<String>> failed = readResult.getFailedReads(); PCollection<String> resources = readResult.getResources(); PCollection<String> failedMsgIds = failed.apply( MapElements.into(TypeDescriptors.strings()).via(HealthcareIOError::getDataResource)); PAssert.that(failedMsgIds).containsInAnyOrder(badMessageIDs); PAssert.that(resources).empty(); pipeline.run(); }
Example #5
Source File: HadoopFormatIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWritingDataFailInvalidValueType() { conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath()); List<KV<Text, Text>> data = new ArrayList<>(); data.add(KV.of(new Text("key"), new Text("value"))); TypeDescriptor<Text> textTypeDescriptor = new TypeDescriptor<Text>() {}; PCollection<KV<Text, Text>> input = p.apply(Create.of(data)) .setTypeDescriptor(TypeDescriptors.kvs(textTypeDescriptor, textTypeDescriptor)); thrown.expect(Pipeline.PipelineExecutionException.class); thrown.expectMessage(Text.class.getName()); input.apply( "Write", HadoopFormatIO.<Text, Text>write() .withConfiguration(conf) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath()))); p.run().waitUntilFinish(); }
Example #6
Source File: DistinctTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild_Windowing() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<String> uniq = Distinct.of(dataset) .windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES) .output(); final Distinct distinct = (Distinct) TestUtils.getProducer(uniq); assertTrue(distinct.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get()); assertEquals( FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn()); assertEquals(DefaultTrigger.of(), windowDesc.getTrigger()); }
Example #7
Source File: HL7v2IOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void test_HL7v2IO_failedReads() { List<String> badMessageIDs = Arrays.asList( "projects/a/locations/b/datasets/c/hl7V2Stores/d/messages/foo", "projects/a/locations/b/datasets/c/hl7V2Stores/d/messages/bar"); HL7v2IO.Read.Result readResult = pipeline.apply(Create.of(badMessageIDs)).apply(HL7v2IO.getAll()); PCollection<HealthcareIOError<String>> failed = readResult.getFailedReads(); PCollection<HL7v2Message> messages = readResult.getMessages(); PCollection<String> failedMsgIds = failed.apply( MapElements.into(TypeDescriptors.strings()).via(HealthcareIOError::getDataResource)); PAssert.that(failedMsgIds).containsInAnyOrder(badMessageIDs); PAssert.that(messages).empty(); pipeline.run(); }
Example #8
Source File: ParsePayloadTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testVersionInPayload() { ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz"); // printf '{"version":4}' | base64 -> eyJ2ZXJzaW9uIjo0fQ== String input = "{\"attributeMap\":" // + "{\"document_namespace\":\"telemetry\"" // + ",\"app_name\":\"Firefox\"" // + ",\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" // + ",\"document_type\":\"main\"" // + "},\"payload\":\"eyJ2ZXJzaW9uIjo0fQ==\"}"; Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline.apply(Create.of(input)) .apply(InputFileFormat.json.decode()).apply(ParsePayload.of(schemasLocation)); PCollection<String> exceptions = result.failures().apply(MapElements .into(TypeDescriptors.strings()).via(message -> message.getAttribute("exception_class"))); PAssert.that(result.output()).empty(); // If we get a ValidationException here, it means we successfully extracted version from // the payload and found a valid schema; we expect the payload to not validate. PAssert.that(exceptions).containsInAnyOrder("org.everit.json.schema.ValidationException"); pipeline.run(); }
Example #9
Source File: ParsePayloadTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testMetadataInPayload() { ValueProvider<String> schemasLocation = pipeline.newProvider("schemas.tar.gz"); String input = "{\"id\":null,\"document_id\":\"2c3a0767-d84a-4d02-8a92-fa54a3376049\"" + ",\"metadata\":{\"document_namespace\":\"test\",\"document_type\":\"test\"" + ",\"document_version\":\"1\",\"geo\":{\"country\":\"FI\"}}}"; Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline // .apply(Create.of(input)) // .apply(InputFileFormat.text.decode()) // .apply(ParsePayload.of(schemasLocation)); PAssert.that(result.failures()).empty(); final PCollection<Integer> attributeCounts = result.output().apply(MapElements .into(TypeDescriptors.integers()).via(message -> message.getAttributeMap().size())); PAssert.thatSingleton(attributeCounts).isEqualTo(5); final String expectedMain = "{\"id\":null}"; final PCollection<String> main = result.output() // .apply("encodeTextMain", OutputFileFormat.text.encode()); PAssert.thatSingleton(main).isEqualTo(expectedMain); pipeline.run(); }
Example #10
Source File: LimitPayloadSizeTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testLimit() { List<String> passingPayloads = ImmutableList.of("", "abcdefg", StringUtils.repeat("abcdefg", 50)); List<String> failingPayloads = ImmutableList.of(StringUtils.repeat("abcdefghij", 51)); WithFailures.Result<PCollection<PubsubMessage>, PubsubMessage> result = pipeline // .apply(Create.of(Iterables.concat(passingPayloads, failingPayloads))) // .apply(InputFileFormat.text.decode()) // .apply("LimitPayloadSize", LimitPayloadSize.toBytes(500)); PAssert .that(result.output().apply("get success payload", MapElements.into(TypeDescriptors.strings()).via(m -> new String(m.getPayload())))) // .containsInAnyOrder(passingPayloads); PAssert .that(result.failures().apply("get failure payload", MapElements.into(TypeDescriptors.strings()).via(m -> new String(m.getPayload())))) // .containsInAnyOrder(failingPayloads); pipeline.run(); }
Example #11
Source File: HadoopFormatIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWritingDataFailInvalidKeyType() { conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath()); List<KV<String, Employee>> data = new ArrayList<>(); data.add(KV.of("key", new Employee("name", "address"))); PCollection<KV<String, Employee>> input = p.apply("CreateData", Create.of(data)) .setTypeDescriptor( TypeDescriptors.kvs( new TypeDescriptor<String>() {}, new TypeDescriptor<Employee>() {})); thrown.expect(Pipeline.PipelineExecutionException.class); thrown.expectMessage(String.class.getName()); input.apply( "Write", HadoopFormatIO.<String, Employee>write() .withConfiguration(conf) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath()))); p.run().waitUntilFinish(); }
Example #12
Source File: SnsIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCustomCoder() throws Exception { final PublishRequest request1 = createSampleMessage("my_first_message"); final TupleTag<PublishResult> results = new TupleTag<>(); final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess(); final MockCoder mockCoder = new MockCoder(); final PCollectionTuple snsWrites = p.apply(Create.of(request1)) .apply( SnsIO.write() .withTopicName(topicName) .withAWSClientsProvider(new Provider(amazonSnsSuccess)) .withResultOutputTag(results) .withCoder(mockCoder)); final PCollection<Long> publishedResultsSize = snsWrites .get(results) .apply(MapElements.into(TypeDescriptors.strings()).via(result -> result.getMessageId())) .apply(Count.globally()); PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(1L)); p.run().waitUntilFinish(); assertThat(mockCoder.captured).isNotNull(); }
Example #13
Source File: BigQueryMergerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Test public void testAutoValueMergeInfoClass() throws Exception { MergeInfo mergeInfo = MergeInfo.create( TIMESTAMP_META_FIELD, DELETED_META_FIELD, TABLE_1, TABLE_2, FULL_COLUMN_LIST, PRIMARY_KEY_COLUMNS); PCollection<KV<String, MergeInfo>> result = pipeline .apply(Create.of(mergeInfo)) .apply( WithKeys.<String, MergeInfo>of(mi -> mi.getReplicaTable()) .withKeyType(TypeDescriptors.strings())) .apply( new TriggerPerKeyOnFixedIntervals<>(Duration.standardMinutes(WINDOW_SIZE_MINUTES))); PAssert.that(result).containsInAnyOrder(KV.of(mergeInfo.getReplicaTable(), mergeInfo)); pipeline.run().waitUntilFinish(); }
Example #14
Source File: BeamJdbcAvroSchema.java From dbeam with Apache License 2.0 | 6 votes |
/** Generate Avro schema by reading one row. Expose Beam metrics via a Beam PTransform. */ public static Schema createSchema( final Pipeline pipeline, final JdbcExportArgs args, final Connection connection) throws Exception { final long startTime = System.nanoTime(); final Schema generatedSchema = generateAvroSchema(args, connection); final long elapsedTimeSchema = (System.nanoTime() - startTime) / 1000000; LOGGER.info("Elapsed time to schema {} seconds", elapsedTimeSchema / 1000.0); final Counter cnt = Metrics.counter(BeamJdbcAvroSchema.class.getCanonicalName(), "schemaElapsedTimeMs"); pipeline .apply( "ExposeSchemaCountersSeed", Create.of(Collections.singletonList(0)).withType(TypeDescriptors.integers())) .apply( "ExposeSchemaCounters", MapElements.into(TypeDescriptors.integers()) .via( v -> { cnt.inc(elapsedTimeSchema); return v; })); return generatedSchema; }
Example #15
Source File: UserScoreTest.java From beam with Apache License 2.0 | 6 votes |
/** Test that bad input data is dropped appropriately. */ @Test @Category(ValidatesRunner.class) public void testUserScoresBadInput() throws Exception { PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of())); PCollection<KV<String, Integer>> extract = input .apply(ParDo.of(new ParseEventFn())) .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); PAssert.that(extract).empty(); p.run().waitUntilFinish(); }
Example #16
Source File: ExternalTest.java From beam with Apache License 2.0 | 6 votes |
@Override public Map<String, ExpansionService.TransformProvider> knownTransforms() { return ImmutableMap.of( TEST_URN_SIMPLE, spec -> MapElements.into(TypeDescriptors.strings()).via((String x) -> x + x), TEST_URN_LE, spec -> Filter.lessThanEq(Integer.parseInt(spec.getPayload().toStringUtf8())), TEST_URN_MULTI, spec -> ParDo.of( new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) { if (c.element() % 2 == 0) { c.output(c.element()); } else { c.output(odd, c.element()); } } }) .withOutputTags(even, TupleTagList.of(odd))); }
Example #17
Source File: CountByKey.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<KeyT, Long>> expand(PCollectionList<InputT> inputs) { return ReduceByKey.named(getName().orElse(null)) .of(PCollectionLists.getOnlyElement(inputs)) .keyBy(getKeyExtractor(), getKeyType().orElse(null)) .valueBy(v -> 1L, TypeDescriptors.longs()) .combineBy(Sums.ofLongs()) .applyIf( getWindow().isPresent(), builder -> { @SuppressWarnings("unchecked") final ReduceByKey.WindowByInternalBuilder<InputT, KeyT, Long> cast = (ReduceByKey.WindowByInternalBuilder) builder; return cast.windowBy( getWindow() .orElseThrow( () -> new IllegalStateException( "Unable to resolve windowing for CountByKey expansion."))); }) .output(); }
Example #18
Source File: ReduceWindow.java From beam with Apache License 2.0 | 6 votes |
private ReduceWindow( @Nullable String name, UnaryFunction<InputT, ValueT> valueExtractor, @Nullable TypeDescriptor<ValueT> valueType, VoidFunction<AccT> accumulatorFactory, BinaryFunction<AccT, ValueT, AccT> accumulate, CombinableBinaryFunction<AccT> mergeAccumulators, UnaryFunction<AccT, OutputT> outputFn, @Nullable TypeDescriptor<AccT> accumulatorType, @Nullable BinaryFunction<ValueT, ValueT, Integer> valueComparator, @Nullable Window<InputT> window, TypeDescriptor<OutputT> outputType) { super(name, outputType, e -> B_ZERO, TypeDescriptors.bytes(), window); this.accumulatorFactory = requireNonNull(accumulatorFactory); this.accumulate = requireNonNull(accumulate); this.mergeAccumulators = requireNonNull(mergeAccumulators); this.outputFn = requireNonNull(outputFn); this.accumulatorType = accumulatorType; this.valueExtractor = requireNonNull(valueExtractor); this.valueType = valueType; this.valueComparator = valueComparator; this.reducer = null; }
Example #19
Source File: ReduceByKeyTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild_Windowing() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<KV<String, Long>> reduced = ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(Sums.ofLongs()) .windowBy(FixedWindows.of(Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES) .output(); final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); assertTrue(reduce.getWindow().isPresent()); @SuppressWarnings("unchecked") final Window<? extends BoundedWindow> window = (Window) reduce.getWindow().get(); assertEquals(FixedWindows.of(org.joda.time.Duration.standardHours(1)), window.getWindowFn()); assertEquals(DefaultTrigger.of(), WindowDesc.of(window).getTrigger()); assertSame( AccumulationMode.DISCARDING_FIRED_PANES, WindowDesc.of(window).getAccumulationMode()); assertFalse(reduce.getValueComparator().isPresent()); }
Example #20
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWritingInvalidJsonsWithFailuresLambda() { WithFailures.Result<PCollection<String>, KV<MyPojo, String>> result = pipeline .apply( Create.of(Iterables.concat(POJOS, INVALID_POJOS)) .withCoder(SerializableCoder.of(MyPojo.class))) .apply( AsJsons.of(MyPojo.class) .exceptionsInto( TypeDescriptors.kvs( TypeDescriptor.of(MyPojo.class), TypeDescriptors.strings())) .exceptionsVia( f -> KV.of(f.element(), f.exception().getClass().getCanonicalName()))); result.output().setCoder(StringUtf8Coder.of()); PAssert.that(result.output()).containsInAnyOrder(VALID_JSONS); assertWritingWithErrorFunctionHandler(result); pipeline.run(); }
Example #21
Source File: DynamicDestinations.java From beam with Apache License 2.0 | 6 votes |
Coder<DestinationT> getDestinationCoderWithDefault(CoderRegistry registry) throws CannotProvideCoderException { Coder<DestinationT> destinationCoder = getDestinationCoder(); if (destinationCoder != null) { return destinationCoder; } // If dynamicDestinations doesn't provide a coder, try to find it in the coder registry. TypeDescriptor<DestinationT> descriptor = extractFromTypeParameters( this, DynamicDestinations.class, new TypeDescriptors.TypeVariableExtractor< DynamicDestinations<T, DestinationT>, DestinationT>() {}); try { return registry.getCoder(descriptor); } catch (CannotProvideCoderException e) { throw new CannotProvideCoderException( "Failed to infer coder for DestinationT from type " + descriptor + ", please provide it explicitly by overriding getDestinationCoder()", e); } }
Example #22
Source File: DataflowRunner.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<T> expand(PCollection<ValueWithRecordId<T>> input) { return input .apply( WithKeys.of( (ValueWithRecordId<T> value) -> Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS) .withKeyType(TypeDescriptors.integers())) // Reshuffle will dedup based on ids in ValueWithRecordId by passing the data through // WindmillSink. .apply(Reshuffle.of()) .apply( "StripIds", ParDo.of( new DoFn<KV<Integer, ValueWithRecordId<T>>, T>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element().getValue().getValue()); } })); }
Example #23
Source File: BeamSqlBuiltinFunctionsIntegrationTestBase.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PBegin begin) { PCollection<Boolean> result = begin .apply(Create.of(DUMMY_ROW).withRowSchema(DUMMY_SCHEMA)) .apply(SqlTransform.query("SELECT " + expr)) .apply(MapElements.into(TypeDescriptors.booleans()).via(row -> row.getBoolean(0))); PAssert.that(result) .satisfies( input -> { assertTrue("Test expression is false: " + expr, Iterables.getOnlyElement(input)); return null; }); return PDone.in(begin.getPipeline()); }
Example #24
Source File: ReduceByKeyTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild_sortedValues() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<KV<String, List<Long>>> reduced = ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues(Long::compare) .windowBy(FixedWindows.of(Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES) .output(); final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); assertTrue(reduce.getValueComparator().isPresent()); }
Example #25
Source File: JacksonTransformsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testParsingInvalidJsonsWithFailuresLambda() { WithFailures.Result<PCollection<MyPojo>, KV<String, String>> result = pipeline .apply(Create.of(Iterables.concat(VALID_JSONS, INVALID_JSONS))) .apply( ParseJsons.of(MyPojo.class) .exceptionsInto( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.strings())) .exceptionsVia( f -> KV.of(f.element(), f.exception().getClass().getCanonicalName()))); result.output().setCoder(SerializableCoder.of(MyPojo.class)); PAssert.that(result.output()).containsInAnyOrder(POJOS); assertParsingWithErrorFunctionHandler(result); pipeline.run(); }
Example #26
Source File: SumByKeyTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild_Windowing() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<KV<String, Long>> counted = SumByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .discardingFiredPanes() .withAllowedLateness(Duration.millis(1000)) .output(); final SumByKey sum = (SumByKey) TestUtils.getProducer(counted); assertTrue(sum.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) sum.getWindow().get()); assertEquals( FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn()); assertEquals(DefaultTrigger.of(), windowDesc.getTrigger()); assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode()); assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness()); }
Example #27
Source File: KeyByBigQueryTableDestination.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Override public Result<PCollection<KV<TableDestination, PubsubMessage>>, PubsubMessage> expand( PCollection<PubsubMessage> messages) { return messages .apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptor.of(TableDestination.class), TypeDescriptor.of(PubsubMessage.class))).via((PubsubMessage msg) -> { msg = PubsubConstraints.ensureNonNull(msg); return KV.of(getTableDestination(msg.getAttributeMap()), msg); }).exceptionsInto(TypeDescriptor.of(PubsubMessage.class)) .exceptionsVia((WithFailures.ExceptionElement<PubsubMessage> ee) -> { try { throw ee.exception(); } catch (IllegalArgumentException e) { return FailureMessage.of(KeyByBigQueryTableDestination.class.getSimpleName(), // ee.element(), // ee.exception()); } })); }
Example #28
Source File: ReduceByKeyTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBuild_CombineByStream() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<KV<String, Long>> reduced = ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(s -> s.mapToLong(e -> e).sum()) .output(); final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); assertNotNull(reduce.getReducer()); assertFalse(reduce.isCombineFnStyle()); }
Example #29
Source File: HadoopFormatIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<Integer, KV<KeyT, ValueT>>> expand(PCollection<KV<KeyT, ValueT>> input) { return input .apply( "AssignTask", ParDo.of(new AssignTaskFn<KeyT, ValueT>(configView)).withSideInputs(configView)) .setTypeDescriptor( TypeDescriptors.kvs(TypeDescriptors.integers(), input.getTypeDescriptor())) .apply("GroupByTaskId", GroupByKey.create()) .apply("FlattenGroupedTasks", ParDo.of(new FlattenGroupedTasks<>())); }
Example #30
Source File: ReduceByKeyTest.java From beam with Apache License 2.0 | 5 votes |
/** Validates the output type upon a `.reduceBy` operation on global window. */ @Test public void testReductionType0_outputValues() { execute( new AbstractTestCase<Integer, Set<Integer>>() { @Override protected List<Integer> getInput() { return Arrays.asList(1, 2, 3, 4, 5, 6, 7, 9); } @Override protected TypeDescriptor<Integer> getInputType() { return TypeDescriptors.integers(); } @Override protected PCollection<Set<Integer>> getOutput(PCollection<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .outputValues(); } @Override public List<Set<Integer>> getUnorderedOutput() { return Arrays.asList(Sets.newHashSet(2, 4, 6), Sets.newHashSet(1, 3, 5, 7, 9)); } }); }