org.apache.beam.sdk.transforms.SerializableFunction Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.SerializableFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InvoicingUtils.java From nomulus with Apache License 2.0 | 6 votes |
/** * Returns a function mapping from {@code BillingEvent} to filename {@code Params}. * * <p>Beam uses this to determine which file a given {@code BillingEvent} should get placed into. * * @param outputBucket the GCS bucket we're outputting reports to * @param yearMonthProvider a runtime provider for the yyyy-MM we're generating the invoice for */ static SerializableFunction<BillingEvent, Params> makeDestinationFunction( String outputBucket, ValueProvider<String> yearMonthProvider) { return billingEvent -> new Params() .withShardTemplate("") .withSuffix(".csv") .withBaseFilename( NestedValueProvider.of( yearMonthProvider, yearMonth -> FileBasedSink.convertToFileResourceIfPossible( String.format( "%s/%s/%s", outputBucket, yearMonth, billingEvent.toFilename(yearMonth))))); }
Example #2
Source File: SchemaRegistry.java From beam with Apache License 2.0 | 6 votes |
@Nullable @Override public <T> SerializableFunction<T, Row> toRowFunction(TypeDescriptor<T> typeDescriptor) { TypeDescriptor<?> type = typeDescriptor; do { SchemaProvider schemaProvider = providers.get(type); if (schemaProvider != null) { return (SerializableFunction<T, Row>) schemaProvider.toRowFunction(type); } Class<?> superClass = type.getRawType().getSuperclass(); if (superClass == null || superClass.equals(Object.class)) { return null; } type = TypeDescriptor.of(superClass); } while (true); }
Example #3
Source File: CassandraIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCustomMapperImplDelete() { counter.set(0); SerializableFunction<Session, Mapper> factory = new NOOPMapperFactory(); pipeline .apply(Create.of("")) .apply( CassandraIO.<String>delete() .withHosts(Collections.singletonList(CASSANDRA_HOST)) .withPort(cassandraPort) .withKeyspace(CASSANDRA_KEYSPACE) .withMapperFactoryFn(factory) .withEntity(String.class)); pipeline.run(); assertEquals(1, counter.intValue()); }
Example #4
Source File: PaneExtractorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void onlyPaneMultiplePanesFails() { SerializableFunction<Iterable<ValueInSingleWindow<Integer>>, Iterable<Integer>> extractor = PaneExtractors.onlyPane(PAssert.PAssertionSite.capture("")); Iterable<ValueInSingleWindow<Integer>> multipleFiring = ImmutableList.of( ValueInSingleWindow.of( 4, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(true, false, Timing.EARLY)), ValueInSingleWindow.of( 2, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.ON_TIME, 1L, 0L)), ValueInSingleWindow.of( 1, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.LATE, 2L, 1L))); thrown.expectMessage("trigger that fires at most once"); extractor.apply(multipleFiring); }
Example #5
Source File: DualInputNestedvalueProviderTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Test public void testNestedValueProviderStatic() throws Exception { ValueProvider<String> xvp = StaticValueProvider.of("foo"); ValueProvider<Integer> yvp = StaticValueProvider.of(1); ValueProvider<String> zvp = DualInputNestedValueProvider.of( xvp, yvp, new SerializableFunction<TranslatorInput<String, Integer>, String>() { @Override public String apply(TranslatorInput<String, Integer> input) { return input.getX() + (input.getY() + 1); } }); assertTrue(zvp.isAccessible()); assertEquals("foo2", zvp.get()); }
Example #6
Source File: SchemaRegistry.java From beam with Apache License 2.0 | 6 votes |
@Nullable @Override public <T> SerializableFunction<Row, T> fromRowFunction(TypeDescriptor<T> typeDescriptor) { TypeDescriptor<?> type = typeDescriptor; do { SchemaProvider schemaProvider = providers.get(type); if (schemaProvider != null) { return (SerializableFunction<Row, T>) schemaProvider.fromRowFunction(type); } Class<?> superClass = type.getRawType().getSuperclass(); if (superClass == null || superClass.equals(Object.class)) { return null; } type = TypeDescriptor.of(superClass); } while (true); }
Example #7
Source File: SchemaRegistryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testGetSchemaCoder() throws NoSuchSchemaException { SchemaRegistry registry = SchemaRegistry.createDefault(); registry.registerJavaBean(SimpleBean.class); Schema schema = registry.getSchema(SimpleBean.class); SerializableFunction<SimpleBean, Row> toRowFunction = registry.getToRowFunction(SimpleBean.class); SerializableFunction<Row, SimpleBean> fromRowFunction = registry.getFromRowFunction(SimpleBean.class); SchemaCoder schemaCoder = registry.getSchemaCoder(SimpleBean.class); assertTrue(schema.equivalent(schemaCoder.getSchema())); assertTrue(toRowFunction.equals(schemaCoder.getToRowFunction())); assertTrue(fromRowFunction.equals(schemaCoder.getFromRowFunction())); thrown.expect(NoSuchSchemaException.class); registry.getSchemaCoder(Double.class); }
Example #8
Source File: PaneExtractorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void nonLatePanesSingleEarly() { SerializableFunction<Iterable<ValueInSingleWindow<Integer>>, Iterable<Integer>> extractor = PaneExtractors.nonLatePanes(); Iterable<ValueInSingleWindow<Integer>> onlyOnTime = ImmutableList.of( ValueInSingleWindow.of( 8, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(true, false, Timing.EARLY)), ValueInSingleWindow.of( 4, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(true, false, Timing.EARLY))); assertThat(extractor.apply(onlyOnTime), containsInAnyOrder(4, 8)); }
Example #9
Source File: WatermarkPolicyTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void shouldAdvanceWatermarkWithCustomTimePolicy() { SerializableFunction<KinesisRecord, Instant> timestampFn = (record) -> record.getApproximateArrivalTimestamp().plus(Duration.standardMinutes(1)); WatermarkPolicy policy = WatermarkPolicyFactory.withCustomWatermarkPolicy( WatermarkParameters.create().withTimestampFn(timestampFn)) .createWatermarkPolicy(); KinesisRecord a = mock(KinesisRecord.class); KinesisRecord b = mock(KinesisRecord.class); Instant time1 = NOW.minus(Duration.standardSeconds(30L)); Instant time2 = NOW.minus(Duration.standardSeconds(20L)); when(a.getApproximateArrivalTimestamp()).thenReturn(time1); when(b.getApproximateArrivalTimestamp()).thenReturn(time2); policy.update(a); assertThat(policy.getWatermark()).isEqualTo(time1.plus(Duration.standardMinutes(1))); policy.update(b); assertThat(policy.getWatermark()).isEqualTo(time2.plus(Duration.standardMinutes(1))); }
Example #10
Source File: DynamoDBIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWriteDataToDynamo() { final List<WriteRequest> writeRequests = DynamoDBIOTestHelper.generateWriteRequests(numOfItems); final PCollection<Void> output = pipeline .apply(Create.of(writeRequests)) .apply( DynamoDBIO.<WriteRequest>write() .withWriteRequestMapperFn( (SerializableFunction<WriteRequest, KV<String, WriteRequest>>) writeRequest -> KV.of(tableName, writeRequest)) .withRetryConfiguration( DynamoDBIO.RetryConfiguration.create(5, Duration.standardMinutes(1))) .withAwsClientsProvider( AwsClientsProviderMock.of(DynamoDBIOTestHelper.getDynamoDBClient()))); final PCollection<Long> publishedResultsSize = output.apply(Count.globally()); PAssert.that(publishedResultsSize).containsInAnyOrder(0L); pipeline.run().waitUntilFinish(); }
Example #11
Source File: BigQueryStorageTableSource.java From beam with Apache License 2.0 | 6 votes |
public static <T> BigQueryStorageTableSource<T> create( ValueProvider<TableReference> tableRefProvider, @Nullable TableReadOptions readOptions, @Nullable ValueProvider<List<String>> selectedFields, @Nullable ValueProvider<String> rowRestriction, SerializableFunction<SchemaAndRecord, T> parseFn, Coder<T> outputCoder, BigQueryServices bqServices) { return new BigQueryStorageTableSource<>( tableRefProvider, readOptions, selectedFields, rowRestriction, parseFn, outputCoder, bqServices); }
Example #12
Source File: StreamingWriteTables.java From beam with Apache License 2.0 | 6 votes |
private StreamingWriteTables( BigQueryServices bigQueryServices, InsertRetryPolicy retryPolicy, boolean extendedErrorInfo, boolean skipInvalidRows, boolean ignoreUnknownValues, boolean ignoreInsertIds, Coder<ElementT> elementCoder, SerializableFunction<ElementT, TableRow> toTableRow) { this.bigQueryServices = bigQueryServices; this.retryPolicy = retryPolicy; this.extendedErrorInfo = extendedErrorInfo; this.skipInvalidRows = skipInvalidRows; this.ignoreUnknownValues = ignoreUnknownValues; this.ignoreInsertIds = ignoreInsertIds; this.elementCoder = elementCoder; this.toTableRow = toTableRow; }
Example #13
Source File: PaneExtractorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void allPanesMultiplePanes() { SerializableFunction<Iterable<ValueInSingleWindow<Integer>>, Iterable<Integer>> extractor = PaneExtractors.allPanes(); Iterable<ValueInSingleWindow<Integer>> onlyOnTime = ImmutableList.of( ValueInSingleWindow.of( 8, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.LATE, 2L, 1L)), ValueInSingleWindow.of( 4, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.ON_TIME, 1L, 0L)), ValueInSingleWindow.of( 1, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(true, false, Timing.EARLY))); assertThat(extractor.apply(onlyOnTime), containsInAnyOrder(4, 8, 1)); }
Example #14
Source File: PaneExtractorsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void onTimePane() { SerializableFunction<Iterable<ValueInSingleWindow<Integer>>, Iterable<Integer>> extractor = PaneExtractors.onTimePane(); Iterable<ValueInSingleWindow<Integer>> onlyOnTime = ImmutableList.of( ValueInSingleWindow.of( 4, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.ON_TIME, 1L, 0L)), ValueInSingleWindow.of( 2, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.ON_TIME, 1L, 0L))); assertThat(extractor.apply(onlyOnTime), containsInAnyOrder(2, 4)); }
Example #15
Source File: HashClientInfoTest.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Test public void testOutputIsHashed() { String clientId = "client_id"; String clientIp = "client_ip"; Map<String, String> attributes = ImmutableMap.<String, String>builder() .put(Attribute.CLIENT_ID, clientId).put(Attribute.CLIENT_IP, clientIp).build(); PubsubMessage input = new PubsubMessage("{}".getBytes(StandardCharsets.UTF_8), attributes); PCollection<PubsubMessage> output = pipeline.apply(Create.of(input)).apply(HashClientInfo .of(pipeline.newProvider(ID_HASH_KEY_PATH), pipeline.newProvider(IP_HASH_KEY_PATH))); PAssert.that(output).satisfies((SerializableFunction<Iterable<PubsubMessage>, Void>) input1 -> { for (PubsubMessage message : input1) { Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_ID), clientId); Assert.assertNotEquals(message.getAttribute(Attribute.CLIENT_IP), clientIp); Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_ID))); Assert.assertTrue(HashClientInfo.isHashed(message.getAttribute(Attribute.CLIENT_IP))); } return null; }); pipeline.run(); }
Example #16
Source File: StreamingInserts.java From beam with Apache License 2.0 | 6 votes |
/** Constructor. */ public StreamingInserts( CreateDisposition createDisposition, DynamicDestinations<?, DestinationT> dynamicDestinations, Coder<ElementT> elementCoder, SerializableFunction<ElementT, TableRow> toTableRow) { this( createDisposition, dynamicDestinations, new BigQueryServicesImpl(), InsertRetryPolicy.alwaysRetry(), false, false, false, false, elementCoder, toTableRow, null); }
Example #17
Source File: DynamoDBIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMissingTotalSegments() { thrown.expectMessage("TotalSegments is required with withScanRequestFn()"); pipeline.apply( DynamoDBIO.read() .withScanRequestFn( (SerializableFunction<Void, ScanRequest>) input -> new ScanRequest(tableName)) .withAwsClientsProvider( AwsClientsProviderMock.of(DynamoDBIOTestHelper.getDynamoDBClient()))); try { pipeline.run().waitUntilFinish(); fail("TotalSegments is required with withScanRequestFn()"); } catch (IllegalArgumentException ex) { assertEquals("TotalSegments is required with withScanRequestFn()", ex.getMessage()); } }
Example #18
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link DynamicAvroDestinations} that always returns the same {@link FilenamePolicy}, * schema, metadata, and codec. */ public static <UserT, OutputT> DynamicAvroDestinations<UserT, Void, OutputT> constantDestinations( FilenamePolicy filenamePolicy, Schema schema, Map<String, Object> metadata, CodecFactory codec, SerializableFunction<UserT, OutputT> formatFunction) { return new ConstantAvroDestination<>(filenamePolicy, schema, metadata, codec, formatFunction); }
Example #19
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWriteWithoutPreparedStatementWithReadRows() throws Exception { SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> dataSource; PCollection<Row> rows = pipeline.apply( JdbcIO.readRows() .withDataSourceProviderFn(dataSourceProvider) .withQuery(String.format("select name,id from %s where name = ?", readTableName)) .withStatementPreparator( preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1)))); String writeTableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS_WITH_READ_ROWS"); DatabaseTestHelper.createTableForRowWithSchema(dataSource, writeTableName); try { rows.apply( JdbcIO.<Row>write() .withDataSourceConfiguration( JdbcIO.DataSourceConfiguration.create( "org.apache.derby.jdbc.ClientDriver", "jdbc:derby://localhost:" + port + "/target/beam")) .withBatchSize(10L) .withTable(writeTableName)); pipeline.run(); } finally { DatabaseTestHelper.deleteTable(dataSource, writeTableName); } }
Example #20
Source File: PubsubIO.java From beam with Apache License 2.0 | 5 votes |
private static <T> SerializableFunction<T, PubsubMessage> formatPayloadUsingCoder( Coder<T> coder) { return input -> { try { return new PubsubMessage(CoderUtils.encodeToByteArray(coder, input), ImmutableMap.of()); } catch (CoderException e) { throw new RuntimeException("Could not encode Pubsub message", e); } }; }
Example #21
Source File: SnowflakeServiceConfig.java From beam with Apache License 2.0 | 5 votes |
public SnowflakeServiceConfig( SerializableFunction<Void, DataSource> dataSourceProviderFn, String table, String query, String storageIntegration, String stagingBucketDir) { this.dataSourceProviderFn = dataSourceProviderFn; this.table = table; this.query = query; this.storageIntegrationName = storageIntegration; this.stagingBucketDir = stagingBucketDir; }
Example #22
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollectionSingletonAssert<T> satisfies(SerializableFunction<T, Void> checkerFn) { actual.apply( "PAssert$" + (assertCount++), new GroupThenAssertForSingleton<>(checkerFn, rewindowingStrategy, paneExtractor, site)); return this; }
Example #23
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
private GroupThenAssert( SerializableFunction<Iterable<T>, Void> checkerFn, AssertionWindows rewindowingStrategy, SimpleFunction<Iterable<ValueInSingleWindow<T>>, Iterable<T>> paneExtractor, PAssertionSite site) { this.checkerFn = checkerFn; this.rewindowingStrategy = rewindowingStrategy; this.paneExtractor = paneExtractor; this.site = site; }
Example #24
Source File: PaneExtractorsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void onTimePaneOnlyEarlyAndLate() { SerializableFunction<Iterable<ValueInSingleWindow<Integer>>, Iterable<Integer>> extractor = PaneExtractors.onTimePane(); Iterable<ValueInSingleWindow<Integer>> onlyOnTime = ImmutableList.of( ValueInSingleWindow.of( 8, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.LATE, 2L, 1L)), ValueInSingleWindow.of( 4, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.ON_TIME, 1L, 0L)), ValueInSingleWindow.of( 2, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(false, false, Timing.ON_TIME, 1L, 0L)), ValueInSingleWindow.of( 1, new Instant(0L), GlobalWindow.INSTANCE, PaneInfo.createPane(true, false, Timing.EARLY))); assertThat(extractor.apply(onlyOnTime), containsInAnyOrder(2, 4)); }
Example #25
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
private OneSideInputAssert( PTransform<PBegin, PCollectionView<ActualT>> createActual, PTransform<PCollection<Integer>, PCollection<Integer>> windowToken, SerializableFunction<ActualT, Void> checkerFn, PAssertionSite site) { this.createActual = createActual; this.windowToken = windowToken; this.checkerFn = checkerFn; this.site = site; }
Example #26
Source File: ProtoDynamicMessageSchemaTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testOneOfProtoToRow() throws InvalidProtocolBufferException { ProtoDynamicMessageSchema schemaProvider = schemaFromDescriptor(OneOf.getDescriptor()); SerializableFunction<DynamicMessage, Row> toRow = schemaProvider.getToRowFunction(); // equality doesn't work between dynamic messages and other, // so we compare string representation assertEquals(ONEOF_ROW_INT32.toString(), toRow.apply(toDynamic(ONEOF_PROTO_INT32)).toString()); assertEquals(ONEOF_ROW_BOOL.toString(), toRow.apply(toDynamic(ONEOF_PROTO_BOOL)).toString()); assertEquals( ONEOF_ROW_STRING.toString(), toRow.apply(toDynamic(ONEOF_PROTO_STRING)).toString()); assertEquals( ONEOF_ROW_PRIMITIVE.toString(), toRow.apply(toDynamic(ONEOF_PROTO_PRIMITIVE)).toString()); }
Example #27
Source File: DynamicFileDestinations.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link DynamicDestinations} that returns instances of {@link DefaultFilenamePolicy} * configured with the given {@link Params}. */ public static <UserT, OutputT> DynamicDestinations<UserT, Params, OutputT> toDefaultPolicies( SerializableFunction<UserT, Params> destinationFunction, Params emptyDestination, SerializableFunction<UserT, OutputT> formatFunction) { return new DefaultPolicyDestinations<>(destinationFunction, emptyDestination, formatFunction); }
Example #28
Source File: JavaFieldSchemaTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEnumFieldToRow() throws NoSuchSchemaException { SchemaRegistry registry = SchemaRegistry.createDefault(); Schema schema = registry.getSchema(PojoWithEnum.class); SchemaTestUtils.assertSchemaEquivalent(POJO_WITH_ENUM_SCHEMA, schema); EnumerationType enumerationType = ENUMERATION; List<EnumerationType.Value> allColors = Lists.newArrayList( enumerationType.valueOf("RED"), enumerationType.valueOf("GREEN"), enumerationType.valueOf("BLUE")); Row redRow = Row.withSchema(POJO_WITH_ENUM_SCHEMA) .addValues(enumerationType.valueOf("RED"), allColors) .build(); Row greenRow = Row.withSchema(POJO_WITH_ENUM_SCHEMA) .addValues(enumerationType.valueOf("GREEN"), allColors) .build(); Row blueRow = Row.withSchema(POJO_WITH_ENUM_SCHEMA) .addValues(enumerationType.valueOf("BLUE"), allColors) .build(); List<Color> allColorsJava = Lists.newArrayList(Color.RED, Color.GREEN, Color.BLUE); SerializableFunction<PojoWithEnum, Row> toRow = registry.getToRowFunction(PojoWithEnum.class); assertEquals(redRow, toRow.apply(new PojoWithEnum(Color.RED, allColorsJava))); assertEquals(greenRow, toRow.apply(new PojoWithEnum(Color.GREEN, allColorsJava))); assertEquals(blueRow, toRow.apply(new PojoWithEnum(Color.BLUE, allColorsJava))); }
Example #29
Source File: JdbcIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testReadRowsWithoutStatementPreparator() { SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> dataSource; String name = TestRow.getNameForSeed(1); PCollection<Row> rows = pipeline.apply( JdbcIO.readRows() .withDataSourceProviderFn(dataSourceProvider) .withQuery( String.format( "select name,id from %s where name = '%s'", readTableName, name))); Schema expectedSchema = Schema.of( Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)) .withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true)); assertEquals(expectedSchema, rows.getSchema()); PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID")); PAssert.that(output) .containsInAnyOrder( ImmutableList.of(Row.withSchema(expectedSchema).addValues(name, 1).build())); pipeline.run(); }
Example #30
Source File: KuduIOIT.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that the projected columns are passed down to the Kudu scanner by attempting to read the * {@value KuduTestUtils#COL_NAME} in the parse function when it is omitted. */ private void runReadProjectedColumns() { thrown.expect(IllegalArgumentException.class); readPipeline .apply( "Read with projected columns", KuduIO.<String>read() .withMasterAddresses(options.getKuduMasterAddresses()) .withTable(options.getKuduTable()) .withParseFn( (SerializableFunction<RowResult, String>) input -> input.getString(COL_NAME)) .withProjectedColumns(Collections.singletonList(COL_ID))) // COL_NAME excluded .setCoder(StringUtf8Coder.of()); readPipeline.run().waitUntilFinish(); }