org.apache.beam.sdk.options.PipelineOptions Java Examples
The following examples show how to use
org.apache.beam.sdk.options.PipelineOptions.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SourceTestUtils.java From beam with Apache License 2.0 | 6 votes |
/** * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of the * records read from the list of sources is equal to the records read from the reference source. */ public static <T> void assertSourcesEqualReferenceSource( BoundedSource<T> referenceSource, List<? extends BoundedSource<T>> sources, PipelineOptions options) throws Exception { Coder<T> coder = referenceSource.getOutputCoder(); List<T> referenceRecords = readFromSource(referenceSource, options); List<T> bundleRecords = new ArrayList<>(); for (BoundedSource<T> source : sources) { assertThat( "Coder type for source " + source + " is not compatible with Coder type for referenceSource " + referenceSource, source.getOutputCoder(), equalTo(coder)); List<T> elems = readFromSource(source, options); bundleRecords.addAll(elems); } List<ReadableStructuralValue<T>> bundleValues = createStructuralValues(coder, bundleRecords); List<ReadableStructuralValue<T>> referenceValues = createStructuralValues(coder, referenceRecords); assertThat(bundleValues, containsInAnyOrder(referenceValues.toArray())); }
Example #2
Source File: HBaseIO.java From beam with Apache License 2.0 | 6 votes |
@Override public long getEstimatedSizeBytes(PipelineOptions pipelineOptions) throws Exception { if (estimatedSizeBytes == null) { try (Connection connection = ConnectionFactory.createConnection(read.configuration)) { estimatedSizeBytes = HBaseUtils.estimateSizeBytes( connection, read.tableId, HBaseUtils.getByteKeyRange(read.scan)); } LOG.debug( "Estimated size {} bytes for table {} and scan {}", estimatedSizeBytes, read.tableId, read.scan); } return estimatedSizeBytes; }
Example #3
Source File: GcpOptions.java From beam with Apache License 2.0 | 6 votes |
@Override @Nullable public String create(PipelineOptions options) { String tempLocation = options.getTempLocation(); if (isNullOrEmpty(tempLocation)) { tempLocation = tryCreateDefaultBucket( options, newCloudResourceManagerClient(options.as(CloudResourceManagerOptions.class)) .build()); options.setTempLocation(tempLocation); } else { try { PathValidator validator = options.as(GcsOptions.class).getPathValidator(); validator.validateOutputFilePrefixSupported(tempLocation); } catch (Exception e) { throw new IllegalArgumentException( String.format( "Error constructing default value for gcpTempLocation: tempLocation is not" + " a valid GCS path, %s. ", tempLocation), e); } } return tempLocation; }
Example #4
Source File: GroupingShuffleReaderWithFaultyBytesReadCounter.java From beam with Apache License 2.0 | 6 votes |
public GroupingShuffleReaderWithFaultyBytesReadCounter( PipelineOptions options, byte[] shuffleReaderConfig, @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext, DataflowOperationContext operationContext, boolean sortValues) throws Exception { super( options, shuffleReaderConfig, startShufflePosition, stopShufflePosition, coder, executionContext, operationContext, ShuffleReadCounterFactory.INSTANCE, sortValues); }
Example #5
Source File: DefaultJobBundleFactory.java From beam with Apache License 2.0 | 6 votes |
private static boolean shouldLoadBalanceBundles(JobInfo jobInfo) { PipelineOptions pipelineOptions = PipelineOptionsTranslation.fromProto(jobInfo.pipelineOptions()); boolean loadBalanceBundles = pipelineOptions.as(PortablePipelineOptions.class).getLoadBalanceBundles(); if (loadBalanceBundles) { int stateCacheSize = Integer.parseInt( MoreObjects.firstNonNull( ExperimentalOptions.getExperimentValue( pipelineOptions, ExperimentalOptions.STATE_CACHE_SIZE), "0")); Preconditions.checkArgument( stateCacheSize == 0, "%s must be 0 when using bundle load balancing", ExperimentalOptions.STATE_CACHE_SIZE); } return loadBalanceBundles; }
Example #6
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSplitAtFraction() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); File file = createFileWithData("file", createStringDataset(3, 100)); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null); // Shouldn't be able to split while unstarted. assertSplitAtFractionFails(source, 0, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options); assertSplitAtFractionFails(source, 0, 0.0, options); assertSplitAtFractionFails(source, 70, 0.3, options); assertSplitAtFractionFails(source, 100, 1.0, options); assertSplitAtFractionFails(source, 100, 0.99, options); assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options); }
Example #7
Source File: PipelineOptionsTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Parameters(name = "{index}: {0}") public static Iterable<? extends PipelineOptions> options() { PipelineOptionsFactory.register(TestUnserializableOptions.class); PipelineOptionsFactory.register(TestDefaultOptions.class); PipelineOptionsFactory.register(TestOptions.class); PipelineOptions emptyOptions = PipelineOptionsFactory.create(); TestUnserializableOptions withNonSerializable = PipelineOptionsFactory.as(TestUnserializableOptions.class); withNonSerializable.setUnserializable(new Object()); TestOptions withCustomField = PipelineOptionsFactory.as(TestOptions.class); withCustomField.setExample(99); PipelineOptions withSettings = PipelineOptionsFactory.create(); withSettings.as(ApplicationNameOptions.class).setAppName("my_app"); withSettings.setJobName("my_job"); PipelineOptions withParsedSettings = PipelineOptionsFactory.fromArgs("--jobName=my_job --appName=my_app").create(); return ImmutableList.of( emptyOptions, withNonSerializable, withCustomField, withSettings, withParsedSettings); }
Example #8
Source File: DatastoreV1.java From beam with Apache License 2.0 | 6 votes |
/** Returns Number of entities available for reading. */ public long getNumEntities( PipelineOptions options, String ourKind, @Nullable String namespace) { try { V1Options v1Options = V1Options.from(getProjectId(), getNamespace(), getLocalhost()); V1DatastoreFactory datastoreFactory = new V1DatastoreFactory(); Datastore datastore = datastoreFactory.getDatastore( options, v1Options.getProjectId(), v1Options.getLocalhost()); Entity entity = getLatestTableStats(ourKind, namespace, datastore); return entity.getProperties().get("count").getIntegerValue(); } catch (Exception e) { return -1; } }
Example #9
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCloseUnstartedFilePatternReader() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); List<String> data1 = createStringDataset(3, 50); File file1 = createFileWithData("file1", data1); List<String> data2 = createStringDataset(3, 50); createFileWithData("file2", data2); List<String> data3 = createStringDataset(3, 50); createFileWithData("file3", data3); List<String> data4 = createStringDataset(3, 50); createFileWithData("otherfile", data4); TestFileBasedSource source = new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null); Reader<String> reader = source.createReader(options); // Closing an unstarted FilePatternReader should not throw an exception. try { reader.close(); } catch (Exception e) { throw new AssertionError( "Closing an unstarted FilePatternReader should not throw an exception", e); } }
Example #10
Source File: BoundedReadFromUnboundedSource.java From beam with Apache License 2.0 | 6 votes |
@ProcessElement public void process( @Element Shard<T> shard, OutputReceiver<Shard<T>> out, PipelineOptions options) throws Exception { int numInitialSplits = numInitialSplits(shard.getMaxNumRecords()); List<? extends UnboundedSource<T, ?>> splits = shard.getSource().split(numInitialSplits, options); int numSplits = splits.size(); long[] numRecords = splitNumRecords(shard.getMaxNumRecords(), numSplits); for (int i = 0; i < numSplits; i++) { out.output( shard .toBuilder() .setSource(splits.get(i)) .setMaxNumRecords(numRecords[i]) .setMaxReadTime(shard.getMaxReadTime()) .build()); } }
Example #11
Source File: AbstractDoFnTransform.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * AbstractDoFnTransform constructor. * * @param doFn doFn * @param inputCoder input coder * @param outputCoders output coders * @param mainOutputTag main output tag * @param additionalOutputTags additional output tags * @param windowingStrategy windowing strategy * @param sideInputs side inputs * @param options pipeline options * @param displayData display data. */ public AbstractDoFnTransform(final DoFn<InterT, OutputT> doFn, final Coder<InputT> inputCoder, final Map<TupleTag<?>, Coder<?>> outputCoders, final TupleTag<OutputT> mainOutputTag, final List<TupleTag<?>> additionalOutputTags, final WindowingStrategy<?, ?> windowingStrategy, final Map<Integer, PCollectionView<?>> sideInputs, final PipelineOptions options, final DisplayData displayData, final DoFnSchemaInformation doFnSchemaInformation, final Map<String, PCollectionView<?>> sideInputMapping) { this.doFn = doFn; this.inputCoder = inputCoder; this.outputCoders = outputCoders; this.mainOutputTag = mainOutputTag; this.additionalOutputTags = additionalOutputTags; this.sideInputs = sideInputs; this.serializedOptions = new SerializablePipelineOptions(options); this.windowingStrategy = windowingStrategy; this.displayData = displayData; this.doFnSchemaInformation = doFnSchemaInformation; this.sideInputMapping = sideInputMapping; }
Example #12
Source File: JdbcExportArgsFactory.java From dbeam with Apache License 2.0 | 6 votes |
public static JdbcExportArgs fromPipelineOptions(final PipelineOptions options) throws ClassNotFoundException, IOException { final JdbcExportPipelineOptions exportOptions = options.as(JdbcExportPipelineOptions.class); final JdbcAvroArgs jdbcAvroArgs = JdbcAvroArgs.create( JdbcConnectionArgs.create(exportOptions.getConnectionUrl()) .withUsername(exportOptions.getUsername()) .withPassword(PasswordReader.INSTANCE.readPassword(exportOptions).orElse(null)), exportOptions.getFetchSize(), exportOptions.getAvroCodec(), Optional.ofNullable(exportOptions.getPreCommand()).orElse(Collections.emptyList())); return JdbcExportArgs.create( jdbcAvroArgs, createQueryArgs(exportOptions), exportOptions.getAvroSchemaNamespace(), Optional.ofNullable(exportOptions.getAvroDoc()), exportOptions.isUseAvroLogicalTypes(), Duration.parse(exportOptions.getExportTimeout()), BeamJdbcAvroSchema.parseOptionalInputAvroSchemaFile(exportOptions.getAvroSchemaFilePath())); }
Example #13
Source File: GroupingShuffleReaderTest.java From beam with Apache License 2.0 | 6 votes |
private void runTestBytesReadCounter( List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean useSecondaryKey, ValuesToRead valuesToRead, long expectedReadBytes) throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); runTestBytesReadCounterForOptions( options, input, useSecondaryKey, valuesToRead, expectedReadBytes); // TODO: Remove experimental worker code once inter-transform IO has shipped. options .as(DataflowPipelineDebugOptions.class) .setExperiments(Lists.newArrayList(Experiment.IntertransformIO.getName())); runTestBytesReadCounterForOptions( options, input, useSecondaryKey, valuesToRead, expectedReadBytes); }
Example #14
Source File: CountingSource.java From beam with Apache License 2.0 | 6 votes |
/** * Splits an unbounded source {@code desiredNumSplits} ways by giving each split every {@code * desiredNumSplits}th element that this {@link UnboundedCountingSource} produces. * * <p>E.g., if a source produces all even numbers {@code [0, 2, 4, 6, 8, ...)} and we want to * split into 3 new sources, then the new sources will produce numbers that are 6 apart and are * offset at the start by the original stride: {@code [0, 6, 12, ...)}, {@code [2, 8, 14, ...)}, * and {@code [4, 10, 16, ...)}. */ @Override public List<? extends UnboundedSource<Long, CountingSource.CounterMark>> split( int desiredNumSplits, PipelineOptions options) throws Exception { // Using Javadoc example, stride 2 with 3 splits becomes stride 6. long newStride = stride * desiredNumSplits; ImmutableList.Builder<UnboundedCountingSource> splits = ImmutableList.builder(); for (int i = 0; i < desiredNumSplits; ++i) { // Starts offset by the original stride. Using Javadoc example, this generates starts of // 0, 2, and 4. splits.add( new UnboundedCountingSource( start + i * stride, newStride, elementsPerPeriod, period, timestampFn)); } return splits.build(); }
Example #15
Source File: Task.java From beam with Apache License 2.0 | 6 votes |
public static void main(String[] args) { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); Pipeline pipeline = Pipeline.create(options); PCollection<String> fruits = pipeline.apply("Fruits", Create.of("apple", "banana", "cherry") ); PCollection<String> countries = pipeline.apply("Countries", Create.of("australia", "brazil", "canada") ); PCollection<String> output = applyTransform(fruits, countries); output.apply(Log.ofElements()); pipeline.run(); }
Example #16
Source File: ShuffleSinkFactoryTest.java From beam with Apache License 2.0 | 6 votes |
private ShuffleSink runTestCreateShuffleSinkHelper( byte[] shuffleWriterConfig, String shuffleKind, Coder<?> deserializedCoder, FullWindowedValueCoder<?> coder) throws Exception { CloudObject spec = CloudObject.forClassName("ShuffleSink"); addString(spec, "shuffle_writer_config", encodeBase64String(shuffleWriterConfig)); addString(spec, "shuffle_kind", shuffleKind); PipelineOptions options = PipelineOptionsFactory.create(); ShuffleSinkFactory factory = new ShuffleSinkFactory(); Sink<?> sink = factory.create( spec, deserializedCoder, options, BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create()); Assert.assertThat(sink, new IsInstanceOf(ShuffleSink.class)); ShuffleSink shuffleSink = (ShuffleSink) sink; Assert.assertArrayEquals(shuffleWriterConfig, shuffleSink.shuffleWriterConfig); Assert.assertEquals(coder, shuffleSink.windowedElemCoder); return shuffleSink; }
Example #17
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 6 votes |
private static byte[] serializeWindowingStrategy( WindowingStrategy<?, ?> windowingStrategy, PipelineOptions options) { try { SdkComponents sdkComponents = SdkComponents.create(); String workerHarnessContainerImageURL = DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class)); RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment(workerHarnessContainerImageURL); sdkComponents.registerEnvironment(defaultEnvironmentForDataflow); return WindowingStrategyTranslation.toMessageProto(windowingStrategy, sdkComponents) .toByteArray(); } catch (Exception e) { throw new RuntimeException( String.format("Unable to format windowing strategy %s as bytes", windowingStrategy), e); } }
Example #18
Source File: KinesisSource.java From beam with Apache License 2.0 | 6 votes |
/** * Creates reader based on given {@link KinesisReaderCheckpoint}. If {@link * KinesisReaderCheckpoint} is not given, then we use {@code initialCheckpointGenerator} to * generate new checkpoint. */ @Override public UnboundedReader<KinesisRecord> createReader( PipelineOptions options, KinesisReaderCheckpoint checkpointMark) { CheckpointGenerator checkpointGenerator = initialCheckpointGenerator; if (checkpointMark != null) { checkpointGenerator = new StaticCheckpointGenerator(checkpointMark); } LOG.info("Creating new reader using {}", checkpointGenerator); return new KinesisReader( SimplifiedKinesisClient.from(awsClientsProvider, limit), checkpointGenerator, this, watermarkPolicyFactory, rateLimitPolicyFactory, upToDateThreshold, maxCapacityPerShard); }
Example #19
Source File: BatchGroupAlsoByWindowAndCombineFn.java From beam with Apache License 2.0 | 6 votes |
private CombineWithContext.Context createFromComponents( final PipelineOptions pipelineOptions, final SideInputReader sideInputReader, final BoundedWindow mainInputWindow) { return new CombineWithContext.Context() { @Override public PipelineOptions getPipelineOptions() { return pipelineOptions; } @Override public <T> T sideInput(PCollectionView<T> view) { return sideInputReader.get( view, view.getWindowMappingFn().getSideInputWindow(mainInputWindow)); } }; }
Example #20
Source File: BeamCalciteTable.java From beam with Apache License 2.0 | 5 votes |
BeamCalciteTable( BeamSqlTable beamTable, Map<String, String> pipelineOptionsMap, PipelineOptions pipelineOptions) { super(Object[].class); this.beamTable = beamTable; this.pipelineOptionsMap = pipelineOptionsMap; this.pipelineOptions = pipelineOptions; }
Example #21
Source File: PipelineOptionsTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void structWithNullOptionsDeserializes() throws Exception { Struct serialized = Struct.newBuilder() .putFields( "beam:option:option_key:v1", Value.newBuilder().setNullValue(NullValue.NULL_VALUE).build()) .build(); PipelineOptions deserialized = PipelineOptionsTranslation.fromProto(serialized); assertThat(deserialized, notNullValue()); }
Example #22
Source File: BigtableIO.java From beam with Apache License 2.0 | 5 votes |
@Override public long getEstimatedSizeBytes(PipelineOptions options) throws IOException { // Delegate to testable helper. if (estimatedSizeBytes == null) { estimatedSizeBytes = getEstimatedSizeBytesBasedOnSamples(getSampleRowKeys(options)); } return estimatedSizeBytes; }
Example #23
Source File: KuduIO.java From beam with Apache License 2.0 | 5 votes |
@Override public List<BoundedSource<T>> split(long desiredBundleSizeBytes, PipelineOptions options) throws KuduException { if (serializedToken != null) { return Collections.singletonList(this); // we are already a split } else { Stream<BoundedSource<T>> sources = spec.getKuduService().createTabletScanners(spec).stream() .map(s -> new KuduIO.KuduSource<T>(spec, spec.getCoder(), s)); return sources.collect(Collectors.toList()); } }
Example #24
Source File: ParDoEvaluator.java From beam with Apache License 2.0 | 5 votes |
PushbackSideInputDoFnRunner<InputT, OutputT> createRunner( PipelineOptions options, DoFn<InputT, OutputT> fn, List<PCollectionView<?>> sideInputs, ReadyCheckingSideInputReader sideInputReader, OutputManager outputManager, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, DirectStepContext stepContext, Coder<InputT> inputCoder, Map<TupleTag<?>, Coder<?>> outputCoders, WindowingStrategy<?, ? extends BoundedWindow> windowingStrategy, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping);
Example #25
Source File: WindowRuntimeTest.java From components with Apache License 2.0 | 5 votes |
@Test public void testFixedWindow() { PipelineOptions options = PipelineOptionsFactory.create(); options.setRunner(DirectRunner.class); final Pipeline p = Pipeline.create(options); // creation of PCollection with different timestamp PCollection<IndexedRecord> List<TimestampedValue<IndexedRecord>> data = Arrays.asList(TimestampedValue.of(irA, new Instant(1L)), TimestampedValue.of(irB, new Instant(2L)), TimestampedValue.of(irC, new Instant(3L))); PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p .apply(Create.timestamped(data).withCoder(LazyAvroCoder.of())); WindowProperties windowProperties = new WindowProperties("window"); windowProperties.windowLength.setValue(2); windowProperties.windowSlideLength.setValue(-1); windowProperties.windowSession.setValue(false); windowProperties.setValue("windowLength", 2); windowProperties.setValue("windowSlideLength", -1); windowProperties.setValue("windowSession", false); WindowRuntime windowRun = new WindowRuntime(); windowRun.initialize(null, windowProperties); PCollection<IndexedRecord> test = windowRun.expand(input); PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement()); ///////// // Fixed duration: 2 PAssert.that(windowed_counts).containsInAnyOrder(KV.of(irA, 1L), KV.of(irB, 1L), KV.of(irC, 1L)); p.run(); }
Example #26
Source File: BeamEnumerableConverter.java From beam with Apache License 2.0 | 5 votes |
public static PipelineOptions createPipelineOptions(Map<String, String> map) { final String[] args = new String[map.size()]; int i = 0; for (Map.Entry<String, String> entry : map.entrySet()) { args[i++] = "--" + entry.getKey() + "=" + entry.getValue(); } PipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().create(); FileSystems.setDefaultPipelineOptions(options); options.as(ApplicationNameOptions.class).setAppName("BeamSql"); return options; }
Example #27
Source File: WindowingWindmillReader.java From beam with Apache License 2.0 | 5 votes |
@Override public NativeReader<?> create( CloudObject spec, Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext context, DataflowOperationContext operationContext) throws Exception { checkArgument(coder != null, "coder must not be null"); @SuppressWarnings({"rawtypes", "unchecked"}) Coder<WindowedValue<KeyedWorkItem<Object, Object>>> typedCoder = (Coder<WindowedValue<KeyedWorkItem<Object, Object>>>) coder; return WindowingWindmillReader.create(typedCoder, (StreamingModeExecutionContext) context); }
Example #28
Source File: FnApiStateAccessor.java From beam with Apache License 2.0 | 5 votes |
@Override public <ElementT, AccumT, ResultT> CombiningState<ElementT, AccumT, ResultT> bindCombiningWithContext( String id, StateSpec<CombiningState<ElementT, AccumT, ResultT>> spec, Coder<AccumT> accumCoder, CombineFnWithContext<ElementT, AccumT, ResultT> combineFn) { return (CombiningState<ElementT, AccumT, ResultT>) stateKeyObjectCache.computeIfAbsent( createBagUserStateKey(id), key -> bindCombining( id, spec, accumCoder, CombineFnUtil.bindContext( combineFn, new StateContext<BoundedWindow>() { @Override public PipelineOptions getPipelineOptions() { return pipelineOptions; } @Override public <T> T sideInput(PCollectionView<T> view) { return get(view, currentWindowSupplier.get()); } @Override public BoundedWindow window() { return currentWindowSupplier.get(); } }))); }
Example #29
Source File: SourceTestUtils.java From beam with Apache License 2.0 | 5 votes |
/** Reads all elements from the given {@link BoundedSource}. */ public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options) throws IOException { try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) { return readFromUnstartedReader(reader); } }
Example #30
Source File: DataStoreV1Table.java From beam with Apache License 2.0 | 5 votes |
@Override public BeamTableStatistics getTableStatistics(PipelineOptions options) { long count = DatastoreIO.v1().read().withProjectId(projectId).getNumEntities(options, kind, null); if (count < 0) { return BeamTableStatistics.BOUNDED_UNKNOWN; } return BeamTableStatistics.createBoundedTableStatistics((double) count); }