com.google.cloud.dataflow.sdk.options.PipelineOptions Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.options.PipelineOptions.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GCSFilesSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testReaderGetCurrent() { String projectName = "sampleProject"; String objectName = REPOSITORY + this.source.getDirDelimiter() + projectName; String fileContent = "sample file content"; ByteArrayOutputStream[] out = new ByteArrayOutputStream[1]; PipelineOptions options = PipelineOptionsFactory.create(); setUpGetFilesPage(objectName); setUpGetFileContent(fileContent, out); try { BoundedReader<KV<List<String>, String>> reader = this.source.createReader(options); reader.start(); KV<List<String>, String> value = reader.getCurrent(); assertEquals(value.getKey().size(), 2); assertEquals(value.getKey().get(0), REPOSITORY); assertEquals(value.getKey().get(1), projectName); assertEquals(value.getValue(), fileContent); } catch (IOException e) { fail(); } }
Example #2
Source File: GCSFilesSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testReaderAdvance() { String objectName = REPOSITORY + this.source.getDirDelimiter() + "sampleProject"; PipelineOptions options = PipelineOptionsFactory.create(); BoundedReader<KV<List<String>, String>> reader; try { setUpGetFilesPage(objectName, 0); reader = this.source.createReader(options); assertFalse(reader.start()); setUpGetFilesPage(objectName, 1); reader = this.source.createReader(options); assertTrue(reader.start()); assertFalse(reader.advance()); setUpGetFilesPage(objectName, 2); reader = this.source.createReader(options); assertTrue(reader.start()); assertTrue(reader.advance()); assertFalse(reader.advance()); } catch (IOException e) { fail(); } }
Example #3
Source File: UserManagedKeysApp.java From policyscanner with Apache License 2.0 | 6 votes |
@Override public void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException { PrintWriter out = resp.getWriter(); Preconditions.checkNotNull(Constants.ORG_ID); Preconditions.checkNotNull(Constants.OUTPUT_PREFIX); Preconditions.checkNotNull(Constants.DATAFLOW_STAGING); PipelineOptions options; if (CloudUtil.willExecuteOnCloud()) { options = getCloudExecutionOptions(Constants.DATAFLOW_STAGING); } else { options = getLocalExecutionOptions(); } new ExportedServiceAccountKeyRemover(options, Constants.ORG_ID) .attachSink(TextIO.Write.named("Write output messages").to(Constants.OUTPUT_PREFIX)) .run(); out.println("Test passed! The output was written to GCS"); }
Example #4
Source File: LiveProjectSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testBundleSplitIsJustSource() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); List<LiveProjectSource> bundles = source.splitIntoBundles(0, null); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(0, options); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(1, options); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(100000, options); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(10, null); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); }
Example #5
Source File: LiveProjectSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testAdvanceWithoutStart() { PipelineOptions options = PipelineOptionsFactory.create(); LiveProjectReader reader; this.listProjectsResponse.setProjects(new ArrayList<Project>(0)); this.listProjectsResponse.setNextPageToken(null); try { reader = (LiveProjectReader) this.source.createReader(options); assertFalse(reader.advance()); assertNull(reader.getNextPageToken()); assertTrue(reader.getProjects().isEmpty()); reader.getCurrent(); } catch (IOException e) { fail("IOException in reader.start"); } catch (NoSuchElementException ignored) { // test passed. } }
Example #6
Source File: LiveStateCheckerRunner.java From policyscanner with Apache License 2.0 | 6 votes |
/** * Main function for the runner. * @param args The args this program was called with. * @throws IOException Thrown if there's an error reading from one of the APIs. */ public static void main(String[] args) throws IOException { Preconditions.checkNotNull(Constants.ORG_NAME); Preconditions.checkNotNull(Constants.POLICY_BUCKET); Preconditions.checkNotNull(Constants.OUTPUT_PREFIX); Preconditions.checkNotNull(Constants.DATAFLOW_STAGING); GCSFilesSource source = null; try { source = new GCSFilesSource(Constants.POLICY_BUCKET, Constants.ORG_NAME); } catch (GeneralSecurityException e) { throw new IOException("SecurityException: Cannot create GCSFileSource"); } PipelineOptions options; if (CloudUtil.willExecuteOnCloud()) { options = getCloudExecutionOptions(Constants.DATAFLOW_STAGING); } else { options = getLocalExecutionOptions(); } new OnDemandLiveStateChecker(options, source) .attachSink(TextIO.Write.named("Write messages to GCS").to(Constants.OUTPUT_PREFIX)) .run(); }
Example #7
Source File: GCSFilesSourceTest.java From policyscanner with Apache License 2.0 | 6 votes |
@Test public void testBundleSplitIsJustSource() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); List<GCSFilesSource> bundles = source.splitIntoBundles(0, null); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(0, options); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(1, options); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(100000, options); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); bundles = source.splitIntoBundles(10, null); assertEquals(bundles.size(), 1); assertEquals(bundles.get(0), source); }
Example #8
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 6 votes |
private FlinkInMemoryKeyedCombiningValue(ByteString stateKey, CombineWithContext.KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn, Coder<AccumT> accumCoder, final StateContext<?> stateContext) { Preconditions.checkNotNull(combineFn); Preconditions.checkNotNull(accumCoder); this.stateKey = stateKey; this.combineFn = combineFn; this.accumCoder = accumCoder; this.context = new CombineWithContext.Context() { @Override public PipelineOptions getPipelineOptions() { return stateContext.getPipelineOptions(); } @Override public <T> T sideInput(PCollectionView<T> view) { return stateContext.sideInput(view); } }; accum = combineFn.createAccumulator(key, context); }
Example #9
Source File: GCSFilesSource.java From policyscanner with Apache License 2.0 | 5 votes |
/** * This function just returns the same source as a list, and does not * actually split the load into several bundles. * @param desiredBundleSizeBytes The desired bundle size. Not used. * @param options Pipeline options. Not used * @return A list containing this source as its only element. */ @Override public List<GCSFilesSource> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options) { ArrayList<GCSFilesSource> bundle = new ArrayList<>(1); bundle.add(this); return bundle; }
Example #10
Source File: GCSFilesSourceTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testReaderStart() { String objectName = REPOSITORY + this.source.getDirDelimiter() + "sampleProject"; PipelineOptions options = PipelineOptionsFactory.create(); setUpGetFilesPage(objectName); try { assertTrue(this.source.createReader(options).start()); } catch (IOException e) { fail(); } }
Example #11
Source File: LiveStateChecker.java From policyscanner with Apache License 2.0 | 5 votes |
/** * Construct a LiveStateChecker to compare the live states of GCP resources * with their checked-in known-good counterparts. * @param options The options used to construct the pipeline. * @param knownGoodSource The source used to read the known-good. * @param org The organization the projects are to be read from. */ public LiveStateChecker(PipelineOptions options, BoundedSource<KV<List<String>, String>> knownGoodSource, String org) { this.pipeline = Pipeline.create(options); this.knownGoodSource = knownGoodSource; this.org = org; }
Example #12
Source File: ReadSourceITCase.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public List<ReadSource> splitIntoBundles(long desiredShardSizeBytes, PipelineOptions options) throws Exception { List<ReadSource> res = new ArrayList<>(); FlinkPipelineOptions flinkOptions = options.as(FlinkPipelineOptions.class); int numWorkers = flinkOptions.getParallelism(); Preconditions.checkArgument(numWorkers > 0, "Number of workers should be larger than 0."); float step = 1.0f * (to - from) / numWorkers; for (int i = 0; i < numWorkers; ++i) { res.add(new ReadSource(Math.round(from + i * step), Math.round(from + (i + 1) * step))); } return res; }
Example #13
Source File: LiveStateCheckerRunner.java From policyscanner with Apache License 2.0 | 5 votes |
private static PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(SystemProperty.applicationId.get()); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #14
Source File: LiveProjectSource.java From policyscanner with Apache License 2.0 | 5 votes |
/** * This function just returns the same source as a list, and does not * actually split the load into several bundles. * @param desiredBundleSizeBytes The desired bundle size. Not used. * @param options Pipeline options. Not used * @return A list containing this source as its only element. */ @Override public List<LiveProjectSource> splitIntoBundles( long desiredBundleSizeBytes, PipelineOptions options) throws Exception { List<LiveProjectSource> projectSources = new ArrayList<>(1); projectSources.add(this); return projectSources; }
Example #15
Source File: DesiredStateEnforcerApp.java From policyscanner with Apache License 2.0 | 5 votes |
private PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(SystemProperty.applicationId.get()); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #16
Source File: DesiredStateEnforcer.java From policyscanner with Apache License 2.0 | 5 votes |
/** * Construct a LiveStateChecker to compare the live states of GCP resources * with their checked-in known-good counterparts. * @param options The options used to construct the pipeline. * @param knownGoodSource The source used to read the known-good. * @param org The organization the projects are to be read from. */ public DesiredStateEnforcer(PipelineOptions options, BoundedSource<KV<List<String>, String>> knownGoodSource, String org) { this.pipeline = Pipeline.create(options); this.outputMessages = constructPipeline(this.pipeline, org, knownGoodSource); this.enforcedStates = 0L; }
Example #17
Source File: UserManagedKeysApp.java From policyscanner with Apache License 2.0 | 5 votes |
private PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(SystemProperty.applicationId.get()); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #18
Source File: LiveProjectSourceTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testAdvanceWhenPageTokenNull() { String projectName = "sampleProjectName"; String projectId = "sampleProjectId"; String orgId = ORG; ResourceId resourceId = new ResourceId().setId(orgId); GCPProject gcpProject = new GCPProject(projectId, orgId, projectName); Project project = new Project() .setProjectId(projectId) .setParent(resourceId) .setName(projectName) .setLifecycleState("ACTIVE"); List<Project> projects = Arrays.asList(project); PipelineOptions options = PipelineOptionsFactory.create(); LiveProjectReader reader; this.listProjectsResponse.setProjects(projects); this.listProjectsResponse.setNextPageToken(null); try { reader = (LiveProjectReader) this.source.createReader(options); assertTrue(reader.start()); assertEquals(reader.getNextPageToken(), null); assertEquals(reader.getCurrent(), gcpProject); assertFalse(reader.advance()); reader.getCurrent(); fail("No exception when reading from empty source"); } catch (IOException e) { fail("IOException in reader.start"); } catch (NoSuchElementException ignored) { // test passed. } }
Example #19
Source File: FlinkPipelineRunner.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Construct a runner from the provided options. * * @param options Properties which configure the runner. * @return The newly created runner. */ public static FlinkPipelineRunner fromOptions(PipelineOptions options) { FlinkPipelineOptions flinkOptions = PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options); ArrayList<String> missing = new ArrayList<>(); if (flinkOptions.getAppName() == null) { missing.add("appName"); } if (missing.size() > 0) { throw new IllegalArgumentException( "Missing required values: " + Joiner.on(',').join(missing)); } if (flinkOptions.getFilesToStage() == null) { flinkOptions.setFilesToStage(detectClassPathResourcesToStage( DataflowPipelineRunner.class.getClassLoader())); LOG.info("PipelineOptions.filesToStage was not specified. " + "Defaulting to files from the classpath: will stage {} files. " + "Enable logging at DEBUG level to see which files will be staged.", flinkOptions.getFilesToStage().size()); LOG.debug("Classpath elements: {}", flinkOptions.getFilesToStage()); } // Set Flink Master to [auto] if no option was specified. if (flinkOptions.getFlinkMaster() == null) { flinkOptions.setFlinkMaster("[auto]"); } return new FlinkPipelineRunner(flinkOptions); }
Example #20
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy. * This method assumes that <b>elements are already grouped by key</b>. * <p/> * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)} * is that this method assumes that a combiner function is provided * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}). * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state. * * @param options the general job configuration options. * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}. * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key. * @param combiner the combiner to be used. * @param outputKvCoder the type of the output values. */ public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create( PipelineOptions options, PCollection input, KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner, KvCoder<K, VOUT> outputKvCoder) { Preconditions.checkNotNull(options); KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder(); FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options, input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner); Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of( outputKvCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo = new CoderTypeInformation<>(windowedOutputElemCoder); DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey .transform("GroupByWindowWithCombiner", new CoderTypeInformation<>(outputKvCoder), windower) .returns(outputTypeInfo); return groupedByKeyAndWindow; }
Example #21
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy. * This method assumes that <b>elements are already grouped by key</b>. * <p/> * The difference with {@link #create(PipelineOptions, PCollection, KeyedStream, Combine.KeyedCombineFn, KvCoder)} * is that this method assumes no combiner function * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}). * * @param options the general job configuration options. * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}. * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key. */ public static <K, VIN> DataStream<WindowedValue<KV<K, Iterable<VIN>>>> createForIterable( PipelineOptions options, PCollection input, KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey) { Preconditions.checkNotNull(options); KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder(); Coder<K> keyCoder = inputKvCoder.getKeyCoder(); Coder<VIN> inputValueCoder = inputKvCoder.getValueCoder(); FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper(options, input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, null); Coder<Iterable<VIN>> valueIterCoder = IterableCoder.of(inputValueCoder); KvCoder<K, Iterable<VIN>> outputElemCoder = KvCoder.of(keyCoder, valueIterCoder); Coder<WindowedValue<KV<K, Iterable<VIN>>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of( outputElemCoder, input.getWindowingStrategy().getWindowFn().windowCoder()); CoderTypeInformation<WindowedValue<KV<K, Iterable<VIN>>>> outputTypeInfo = new CoderTypeInformation<>(windowedOutputElemCoder); DataStream<WindowedValue<KV<K, Iterable<VIN>>>> groupedByKeyAndWindow = groupedStreamByKey .transform("GroupByWindow", new CoderTypeInformation<>(windowedOutputElemCoder), windower) .returns(outputTypeInfo); return groupedByKeyAndWindow; }
Example #22
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static <K, VIN, VACC, VOUT> FlinkGroupAlsoByWindowWrapper createForTesting(PipelineOptions options, CoderRegistry registry, WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy, KvCoder<K, VIN> inputCoder, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) { Preconditions.checkNotNull(options); return new FlinkGroupAlsoByWindowWrapper(options, registry, windowingStrategy, inputCoder, combiner); }
Example #23
Source File: FlinkGroupAlsoByWindowWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
private FlinkGroupAlsoByWindowWrapper(PipelineOptions options, CoderRegistry registry, WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy, KvCoder<K, VIN> inputCoder, Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) { Preconditions.checkNotNull(options); this.options = Preconditions.checkNotNull(options); this.coderRegistry = Preconditions.checkNotNull(registry); this.inputKvCoder = Preconditions.checkNotNull(inputCoder);//(KvCoder<K, VIN>) input.getCoder(); this.windowingStrategy = Preconditions.checkNotNull(windowingStrategy);//input.getWindowingStrategy(); this.combineFn = combiner; this.operator = createGroupAlsoByWindowOperator(); this.chainingStrategy = ChainingStrategy.ALWAYS; }
Example #24
Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
public FlinkAbstractParDoWrapper(PipelineOptions options, WindowingStrategy<?, ?> windowingStrategy, DoFn<IN, OUTDF> doFn) { Preconditions.checkNotNull(options); Preconditions.checkNotNull(windowingStrategy); Preconditions.checkNotNull(doFn); this.doFn = doFn; this.options = options; this.windowingStrategy = windowingStrategy; }
Example #25
Source File: FlinkMultiOutputDoFnFunction.java From flink-dataflow with Apache License 2.0 | 5 votes |
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); ObjectMapper mapper = new ObjectMapper(); options = mapper.readValue(in, PipelineOptions.class); }
Example #26
Source File: LiveStateCheckerApp.java From policyscanner with Apache License 2.0 | 5 votes |
private PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(Constants.PROJECT_ID); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #27
Source File: SinkOutputFormat.java From flink-dataflow with Apache License 2.0 | 4 votes |
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); ObjectMapper mapper = new ObjectMapper(); pipelineOptions = mapper.readValue(in, PipelineOptions.class); }
Example #28
Source File: UserManagedKeysApp.java From policyscanner with Apache License 2.0 | 4 votes |
private PipelineOptions getLocalExecutionOptions() { return PipelineOptionsFactory.create(); }
Example #29
Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public PipelineOptions getPipelineOptions() { return options; }
Example #30
Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
public FlinkParDoBoundWrapper(PipelineOptions options, WindowingStrategy<?, ?> windowingStrategy, DoFn<IN, OUT> doFn) { super(options, windowingStrategy, doFn); }