org.apache.beam.sdk.options.ValueProvider Java Examples
The following examples show how to use
org.apache.beam.sdk.options.ValueProvider.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TextStreamingPipeline.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
public TextFileReader( String kmsKeyProjectName, ValueProvider<String> fileDecryptKeyRing, ValueProvider<String> fileDecryptKey, ValueProvider<Integer> batchSize, ValueProvider<String> cSek, ValueProvider<String> cSekhash) throws IOException, GeneralSecurityException { this.batchSize = batchSize; this.kmsKeyProjectName = kmsKeyProjectName; this.fileDecryptKey = fileDecryptKey; this.fileDecryptKeyName = fileDecryptKeyRing; this.cSek = cSek; this.cSekhash = cSekhash; this.customerSuppliedKey = false; this.key = null; }
Example #2
Source File: TextRowToMutation.java From DataflowTemplates with Apache License 2.0 | 6 votes |
public TextRowToMutation( PCollectionView<Ddl> ddlView, PCollectionView<Map<String, List<TableManifest.Column>>> tableColumnsView, ValueProvider<Character> columnDelimiter, ValueProvider<Character> fieldQualifier, ValueProvider<Boolean> trailingDelimiter, ValueProvider<Character> escape, ValueProvider<String> nullString, ValueProvider<String> dateFormat, ValueProvider<String> timestampFormat) { this.ddlView = ddlView; this.tableColumnsView = tableColumnsView; this.columnDelimiter = columnDelimiter; this.fieldQualifier = fieldQualifier; this.trailingDelimiter = trailingDelimiter; this.escape = escape; this.nullString = nullString; this.dateFormat = dateFormat; this.timestampFormat = timestampFormat; }
Example #3
Source File: CassandraIO.java From beam with Apache License 2.0 | 5 votes |
/** Specify the local DC used by the load balancing policy. */ public Write<T> withLocalDc(String localDc) { checkArgument( localDc != null, "CassandraIO." + getMutationTypeName() + "().withLocalDc(localDc) called with null" + " localDc"); return withLocalDc(ValueProvider.StaticValueProvider.of(localDc)); }
Example #4
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Description( "DLP API has a limit for payload size of 524KB /api call. " + "That's why dataflow process will need to chunk it. User will have to decide " + "on how they would like to batch the request depending on number of rows " + "and how big each row is.") @Required ValueProvider<Integer> getBatchSize();
Example #5
Source File: BigQueryToTFRecord.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * The {@link BigQueryToTFRecord#applyTrainTestValSplit} method transforms the PCollection by * randomly partitioning it into PCollections for each dataset. */ static PCollectionList<byte[]> applyTrainTestValSplit(PCollection<byte[]> input, ValueProvider<Float> trainingPercentage, ValueProvider<Float> testingPercentage, ValueProvider<Float> validationPercentage, Random rand) { return input .apply(Partition.of( 3, (Partition.PartitionFn<byte[]>) (number, numPartitions) -> { Float train = trainingPercentage.get(); Float test = testingPercentage.get(); Float validation = validationPercentage.get(); Double d = rand.nextDouble(); if (train + test + validation != 1) { throw new RuntimeException(String.format("Train %.2f, Test %.2f, Validation" + " %.2f percentages must add up to 100 percent", train, test, validation)); } if (d < train) { return 0; } else if (d >= train && d < train + test) { return 1; } else { return 2; } })); }
Example #6
Source File: DisplayDataTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testInaccessibleValueProvider() { DisplayData data = DisplayData.from( new HasDisplayData() { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add( DisplayData.item( "foo", new ValueProvider<String>() { @Override public boolean isAccessible() { return false; } @Override public String get() { return "bar"; } @Override public String toString() { return "toString"; } })); } }); assertThat(data.items(), hasSize(1)); assertThat(data, hasDisplayItem("foo", "toString")); }
Example #7
Source File: SpannerConverters.java From DataflowTemplates with Apache License 2.0 | 5 votes |
public static ExportTransform create( ValueProvider<String> table, SpannerConfig spannerConfig, ValueProvider<String> textWritePrefix) { return ExportTransform.builder() .table(table) .spannerConfig(spannerConfig) .textWritePrefix(textWritePrefix) .build(); }
Example #8
Source File: ExportTransform.java From DataflowTemplates with Apache License 2.0 | 5 votes |
private SchemaBasedDynamicDestinations( PCollectionView<Map<String, SerializableSchemaSupplier>> avroSchemas, PCollectionView<String> uniqueIdView, ValueProvider<ResourceId> baseDir) { this.avroSchemas = avroSchemas; this.uniqueIdView = uniqueIdView; this.baseDir = baseDir; }
Example #9
Source File: BatchLoads.java From beam with Apache License 2.0 | 5 votes |
BatchLoads( WriteDisposition writeDisposition, CreateDisposition createDisposition, boolean singletonTable, DynamicDestinations<?, DestinationT> dynamicDestinations, Coder<DestinationT> destinationCoder, ValueProvider<String> customGcsTempLocation, @Nullable ValueProvider<String> loadJobProjectId, boolean ignoreUnknownValues, Coder<ElementT> elementCoder, RowWriterFactory<ElementT, DestinationT> rowWriterFactory, @Nullable String kmsKey, boolean clusteringEnabled, boolean useAvroLogicalTypes) { bigQueryServices = new BigQueryServicesImpl(); this.writeDisposition = writeDisposition; this.createDisposition = createDisposition; this.singletonTable = singletonTable; this.dynamicDestinations = dynamicDestinations; this.destinationCoder = destinationCoder; this.maxNumWritersPerBundle = DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE; this.maxFileSize = DEFAULT_MAX_FILE_SIZE; this.numFileShards = DEFAULT_NUM_FILE_SHARDS; this.maxFilesPerPartition = DEFAULT_MAX_FILES_PER_PARTITION; this.maxBytesPerPartition = DEFAULT_MAX_BYTES_PER_PARTITION; this.triggeringFrequency = null; this.customGcsTempLocation = customGcsTempLocation; this.loadJobProjectId = loadJobProjectId; this.ignoreUnknownValues = ignoreUnknownValues; this.useAvroLogicalTypes = useAvroLogicalTypes; this.elementCoder = elementCoder; this.kmsKey = kmsKey; this.rowWriterFactory = rowWriterFactory; this.clusteringEnabled = clusteringEnabled; schemaUpdateOptions = Collections.emptySet(); }
Example #10
Source File: DualInputNestedValueProvider.java From DataflowTemplates with Apache License 2.0 | 5 votes |
public DualInputNestedValueProvider( ValueProvider<FirstT> valueX, ValueProvider<SecondT> valueY, SerializableFunction<TranslatorInput<FirstT, SecondT>, T> translator) { this.valueX = valueX; this.valueY = valueY; this.translator = translator; }
Example #11
Source File: CSVStreamingPipelineTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Test public void testNewTracker() { CSVContentProcessorDoFn csv = new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2)); OffsetRange off = new OffsetRange(2, 5); org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker offTrack = csv.newTracker(off); assertEquals(offTrack.currentRestriction(), off); }
Example #12
Source File: BigQueryIOStorageReadTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testTableSourceInitialSplit_EmptyTable() throws Exception { fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null); TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table"); Table table = new Table() .setTableReference(tableRef) .setNumBytes(1024L * 1024L) .setSchema(new TableSchema()); fakeDatasetService.createTable(table); CreateReadSessionRequest expectedRequest = CreateReadSessionRequest.newBuilder() .setParent("projects/project-id") .setTableReference(BigQueryHelpers.toTableRefProto(tableRef)) .setRequestedStreams(1024) .setShardingStrategy(ShardingStrategy.BALANCED) .build(); ReadSession emptyReadSession = ReadSession.newBuilder().build(); StorageClient fakeStorageClient = mock(StorageClient.class); when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(emptyReadSession); BigQueryStorageTableSource<TableRow> tableSource = BigQueryStorageTableSource.create( ValueProvider.StaticValueProvider.of(tableRef), null, null, null, new TableRowParser(), TableRowJsonCoder.of(), new FakeBigQueryServices() .withDatasetService(fakeDatasetService) .withStorageClient(fakeStorageClient)); List<? extends BoundedSource<TableRow>> sources = tableSource.split(1024L, options); assertTrue(sources.isEmpty()); }
Example #13
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
/** * Construct a {@link FileBasedSink} with the given temp directory, producing uncompressed files. */ @Experimental(Kind.FILESYSTEM) public FileBasedSink( ValueProvider<ResourceId> tempDirectoryProvider, DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations) { this(tempDirectoryProvider, dynamicDestinations, Compression.UNCOMPRESSED); }
Example #14
Source File: CassandraIO.java From beam with Apache License 2.0 | 5 votes |
/** Specify the Cassandra instance port number where to write data. */ public Write<T> withPort(int port) { checkArgument( port > 0, "CassandraIO." + getMutationTypeName() + "().withPort(port) called with invalid port " + "number (%s)", port); return withPort(ValueProvider.StaticValueProvider.of(port)); }
Example #15
Source File: SplunkIO.java From beam with Apache License 2.0 | 4 votes |
CreateKeysFn(ValueProvider<Integer> specifiedParallelism) { this.specifiedParallelism = specifiedParallelism; }
Example #16
Source File: CassandraIO.java From beam with Apache License 2.0 | 4 votes |
/** Specify the local DC used for the load balancing. */ public Read<T> withLocalDc(ValueProvider<String> localDc) { return builder().setLocalDc(localDc).build(); }
Example #17
Source File: SinkOptions.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Description("Output to write to (path to file or directory, Pubsub topic, etc.)") @Validation.Required ValueProvider<String> getOutput();
Example #18
Source File: DecoderOptions.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Description("If set to true, assume that all private keys are encrypted with the associated" + " KMS resourceId. Otherwise ignore KMS and assume all private keys are stored in plaintext." + " This may be used for debugging.") @Default.Boolean(true) ValueProvider<Boolean> getAetKmsEnabled();
Example #19
Source File: DatastoreConverters.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Description("Namespace of the Datastore entity") ValueProvider<String> getDatastoreWriteNamespace();
Example #20
Source File: BigQueryIOStorageQueryTest.java From beam with Apache License 2.0 | 4 votes |
private void doQuerySourceInitialSplit( long bundleSize, int requestedStreamCount, int expectedStreamCount) throws Exception { TableReference sourceTableRef = BigQueryHelpers.parseTableSpec("project:dataset.table"); fakeDatasetService.createDataset( sourceTableRef.getProjectId(), sourceTableRef.getDatasetId(), "asia-northeast1", "Fake plastic tree^H^H^H^Htables", null); fakeDatasetService.createTable( new Table().setTableReference(sourceTableRef).setLocation("asia-northeast1")); Table queryResultTable = new Table() .setSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .setNumBytes(1024L * 1024L); String encodedQuery = FakeBigQueryServices.encodeQueryResult(queryResultTable); fakeJobService.expectDryRunQuery( options.getProject(), encodedQuery, new JobStatistics() .setQuery( new JobStatistics2() .setTotalBytesProcessed(1024L * 1024L) .setReferencedTables(ImmutableList.of(sourceTableRef)))); String stepUuid = "testStepUuid"; TableReference tempTableReference = createTempTableReference( options.getProject(), createJobIdToken(options.getJobName(), stepUuid), Optional.empty()); CreateReadSessionRequest expectedRequest = CreateReadSessionRequest.newBuilder() .setParent("projects/" + options.getProject()) .setTableReference(BigQueryHelpers.toTableRefProto(tempTableReference)) .setRequestedStreams(requestedStreamCount) .setShardingStrategy(ShardingStrategy.BALANCED) .build(); Schema sessionSchema = SchemaBuilder.record("__root__") .fields() .name("name") .type() .nullable() .stringType() .noDefault() .name("number") .type() .nullable() .longType() .noDefault() .endRecord(); ReadSession.Builder builder = ReadSession.newBuilder() .setAvroSchema(AvroSchema.newBuilder().setSchema(sessionSchema.toString())); for (int i = 0; i < expectedStreamCount; i++) { builder.addStreams(Stream.newBuilder().setName("stream-" + i)); } StorageClient fakeStorageClient = mock(StorageClient.class); when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(builder.build()); BigQueryStorageQuerySource<TableRow> querySource = BigQueryStorageQuerySource.create( stepUuid, ValueProvider.StaticValueProvider.of(encodedQuery), /* flattenResults = */ true, /* useLegacySql = */ true, /* priority = */ QueryPriority.BATCH, /* location = */ null, /* queryTempDataset = */ null, /* kmsKey = */ null, new TableRowParser(), TableRowJsonCoder.of(), new FakeBigQueryServices() .withDatasetService(fakeDatasetService) .withJobService(fakeJobService) .withStorageClient(fakeStorageClient)); List<? extends BoundedSource<TableRow>> sources = querySource.split(bundleSize, options); assertEquals(expectedStreamCount, sources.size()); }
Example #21
Source File: BigtableToParquet.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Description("The Bigtable instance id that contains the table to export.") ValueProvider<String> getBigtableInstanceId();
Example #22
Source File: BigtableToAvro.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Description("The Bigtable instance id that contains the table to export.") ValueProvider<String> getBigtableInstanceId();
Example #23
Source File: RedisConnectionConfiguration.java From beam with Apache License 2.0 | 4 votes |
public static RedisConnectionConfiguration create( ValueProvider<String> host, ValueProvider<Integer> port) { return create().withHost(host).withPort(port); }
Example #24
Source File: RedisConnectionConfiguration.java From beam with Apache License 2.0 | 4 votes |
/** See {@link RedisConnectionConfiguration#withHost(String)}. */ public RedisConnectionConfiguration withHost(ValueProvider<String> host) { return builder().setHost(host).build(); }
Example #25
Source File: RepublisherOptions.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Description("Duration for which document IDs should be stored for deduplication." + " Allowed formats are: Ns (for seconds, example: 5s)," + " Nm (for minutes, example: 12m), Nh (for hours, example: 2h)." + " Can be omitted if --redisUri is unset.") @Default.String("24h") ValueProvider<String> getDeduplicateExpireDuration();
Example #26
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
/** Like {@link #withPrefix(String)} but with a {@link ValueProvider}. */ public Write<DestinationT, UserT> withPrefix(ValueProvider<String> prefix) { checkArgument(prefix != null, "prefix can not be null"); return toBuilder().setFilenamePrefix(prefix).build(); }
Example #27
Source File: TokenizePipelineOptions.java From dlp-dataflow-deidentification with Apache License 2.0 | 4 votes |
@Description("DataSet Spec") ValueProvider<String> getDataset();
Example #28
Source File: PubsubToBigQueryTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** Tests the {@link PubSubToBigQuery} pipeline end-to-end. */ @Test public void testPubsubToBigQueryE2E() throws Exception { // Test input final String payload = "{\"ticker\": \"GOOGL\", \"price\": 1006.94}"; final PubsubMessage message = new PubsubMessage(payload.getBytes(), ImmutableMap.of("id", "123", "type", "custom_event")); final Instant timestamp = new DateTime(2022, 2, 22, 22, 22, 22, 222, DateTimeZone.UTC).toInstant(); final FailsafeElementCoder<PubsubMessage, String> coder = FailsafeElementCoder.of(PubsubMessageWithAttributesCoder.of(), StringUtf8Coder.of()); CoderRegistry coderRegistry = pipeline.getCoderRegistry(); coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder); // Parameters ValueProvider<String> transformPath = pipeline.newProvider(TRANSFORM_FILE_PATH); ValueProvider<String> transformFunction = pipeline.newProvider("transform"); PubSubToBigQuery.Options options = PipelineOptionsFactory.create().as(PubSubToBigQuery.Options.class); options.setJavascriptTextTransformGcsPath(transformPath); options.setJavascriptTextTransformFunctionName(transformFunction); // Build pipeline PCollectionTuple transformOut = pipeline .apply( "CreateInput", Create.timestamped(TimestampedValue.of(message, timestamp)) .withCoder(PubsubMessageWithAttributesCoder.of())) .apply("ConvertMessageToTableRow", new PubsubMessageToTableRow(options)); // Assert PAssert.that(transformOut.get(PubSubToBigQuery.UDF_DEADLETTER_OUT)).empty(); PAssert.that(transformOut.get(PubSubToBigQuery.TRANSFORM_DEADLETTER_OUT)).empty(); PAssert.that(transformOut.get(PubSubToBigQuery.TRANSFORM_OUT)) .satisfies( collection -> { TableRow result = collection.iterator().next(); assertThat(result.get("ticker"), is(equalTo("GOOGL"))); assertThat(result.get("price"), is(equalTo(1006.94))); return null; }); // Execute pipeline pipeline.run(); }
Example #29
Source File: BigtableToAvro.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@SuppressWarnings("unused") void setBigtableProjectId(ValueProvider<String> projectId);
Example #30
Source File: CassandraToBigtable.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Description( "If true, a large row is split into multiple MutateRows requests. When a row is" + " split across requests, updates are not atomic. ") ValueProvider<Boolean> getSplitLargeRows();