com.google.cloud.dataflow.sdk.values.PCollectionView Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.values.PCollectionView.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SideInputITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForBatch(); final PCollectionView<String> sidesInput = p .apply(Create.of(expected)) .apply(View.<String>asSingleton()); p.apply(Create.of("bli")) .apply(ParDo.of(new DoFn<String, String>() { @Override public void processElement(ProcessContext c) throws Exception { String s = c.sideInput(sidesInput); c.output(s); } }).withSideInputs(sidesInput)).apply(TextIO.Write.to(resultPath)); p.run(); }
Example #2
Source File: FlinkStateInternals.java From flink-dataflow with Apache License 2.0 | 6 votes |
private FlinkInMemoryKeyedCombiningValue(ByteString stateKey, CombineWithContext.KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn, Coder<AccumT> accumCoder, final StateContext<?> stateContext) { Preconditions.checkNotNull(combineFn); Preconditions.checkNotNull(accumCoder); this.stateKey = stateKey; this.combineFn = combineFn; this.accumCoder = accumCoder; this.context = new CombineWithContext.Context() { @Override public PipelineOptions getPipelineOptions() { return stateContext.getPipelineOptions(); } @Override public <T> T sideInput(PCollectionView<T> view) { return stateContext.sideInput(view); } }; accum = combineFn.createAccumulator(key, context); }
Example #3
Source File: PartitionedComputeCorrelationsDoFn.java From data-timeseries-java with Apache License 2.0 | 5 votes |
/** * @param config Configuration for the correlations * @param sideInput Sideinput to be used for the transform * @param matrix Key Matrix */ public PartitionedComputeCorrelationsDoFn(CorrelationParDoConfig config, PCollectionView<Map<String, WorkPacket>> sideInput, PCollectionView<Map<Integer, List<WorkPacketKey>>> matrix) { this.sideInput = sideInput; this.keyMatrix = matrix; this.config = config; }
Example #4
Source File: FlinkDoFnFunction.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public WindowingInternals<IN, OUT> windowingInternals() { return new WindowingInternals<IN, OUT>() { @Override public StateInternals stateInternals() { return null; } @Override public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { } @Override public TimerInternals timerInternals() { return null; } @Override public Collection<? extends BoundedWindow> windows() { return ImmutableList.of(GlobalWindow.INSTANCE); } @Override public PaneInfo pane() { return PaneInfo.NO_FIRING; } @Override public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException { } @Override public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) { throw new RuntimeException("sideInput() not implemented."); } }; }
Example #5
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 5 votes |
private static void transformSideInputs(List<PCollectionView<?>> sideInputs, MapPartitionOperator<?, ?> outputDataSet, FlinkBatchTranslationContext context) { // get corresponding Flink broadcast DataSets for(PCollectionView<?> input : sideInputs) { DataSet<?> broadcastSet = context.getSideInputDataSet(input); outputDataSet.withBroadcastSet(broadcastSet, input.getTagInternal().getId()); } }
Example #6
Source File: FlinkParDoBoundWrapper.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public WindowingInternals<IN, OUT> windowingInternalsHelper(final WindowedValue<IN> inElement, final Collector<WindowedValue<OUT>> collector) { return new WindowingInternals<IN, OUT>() { @Override public StateInternals stateInternals() { throw new NullPointerException("StateInternals are not available for ParDo.Bound()."); } @Override public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) { collector.collect(makeWindowedValue(output, timestamp, windows, pane)); } @Override public TimerInternals timerInternals() { throw new NullPointerException("TimeInternals are not available for ParDo.Bound()."); } @Override public Collection<? extends BoundedWindow> windows() { return inElement.getWindows(); } @Override public PaneInfo pane() { return inElement.getPane(); } @Override public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException { throw new RuntimeException("writePCollectionViewData() not supported in Streaming mode."); } @Override public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) { throw new RuntimeException("sideInput() not implemented."); } }; }
Example #7
Source File: JoinKnownGoodAndLiveStatesTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testFilterStateOneMismatch() { GCPProject project = getSampleProject(""); GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(project, 1); GCPResourceState livePolicy = getSampleGCPResourcePolicy(project, 2); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.DESIRED, checkedPolicy))); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.LIVE, livePolicy))); PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection = pipeline.apply(Create.of(sideInputList)).setCoder( KvCoder.of(SerializableCoder.of(GCPResource.class), KvCoder.of(SerializableCoder.of(StateSource.class), SerializableCoder.of(GCPResourceState.class)))); PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view = View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection); JoinKnownGoodAndLiveStates function = new JoinKnownGoodAndLiveStates(view); DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>, KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function); tester.setSideInputInGlobalWindow(view, sideInputList); Map<StateSource, GCPResourceState> outputMap = new HashMap<>(2); outputMap.put(StateSource.DESIRED, checkedPolicy); outputMap.put(StateSource.LIVE, livePolicy); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> expectedOutput = Arrays.asList(KV.of((GCPResource) project, outputMap)); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results = tester.processBatch(mainInputList); assertEquals(expectedOutput, results); }
Example #8
Source File: JoinKnownGoodAndLiveStatesTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testFilterStateNoMismatches() { GCPProject project = getSampleProject(""); GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(project, 1); GCPResourceState livePolicy = checkedPolicy; List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.DESIRED, checkedPolicy))); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.LIVE, livePolicy))); PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection = pipeline.apply(Create.of(sideInputList)).setCoder( KvCoder.of(SerializableCoder.of(GCPResource.class), KvCoder.of(SerializableCoder.of(StateSource.class), SerializableCoder.of(GCPResourceState.class)))); PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view = View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection); JoinKnownGoodAndLiveStates function = new JoinKnownGoodAndLiveStates(view); DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>, KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function); tester.setSideInputInGlobalWindow(view, sideInputList); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results = tester.processBatch(mainInputList); assertEquals(1, results.size()); }
Example #9
Source File: JoinKnownGoodAndLiveStatesTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testFilterStateNoMatchingResources() { GCPProject checkedProject = getSampleProject("_checked"); GCPProject liveProject = getSampleProject("_live"); GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(checkedProject, 1); GCPResourceState livePolicy = getSampleGCPResourcePolicy(liveProject, 2); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList = Arrays.asList(KV.of((GCPResource) checkedProject, KV.of(StateSource.DESIRED, checkedPolicy))); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList = Arrays.asList(KV.of((GCPResource) liveProject, KV.of(StateSource.LIVE, livePolicy))); PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection = pipeline.apply(Create.of(sideInputList)).setCoder( KvCoder.of(SerializableCoder.of(GCPResource.class), KvCoder.of(SerializableCoder.of(StateSource.class), SerializableCoder.of(GCPResourceState.class)))); PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view = View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection); JoinKnownGoodAndLiveStates function = new JoinKnownGoodAndLiveStates(view); DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>, KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function); tester.setSideInputInGlobalWindow(view, sideInputList); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results = tester.processBatch(mainInputList); assertEquals(0, results.size()); }
Example #10
Source File: FilterOutMatchingStateTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testFilterStateOneMismatch() { GCPProject project = getSampleProject(""); GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(project, 1); GCPResourceState livePolicy = getSampleGCPResourcePolicy(project, 2); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.DESIRED, checkedPolicy))); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.LIVE, livePolicy))); PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection = pipeline.apply(Create.of(sideInputList)).setCoder( KvCoder.of(SerializableCoder.of(GCPResource.class), KvCoder.of(SerializableCoder.of(StateSource.class), SerializableCoder.of(GCPResourceState.class)))); PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view = View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection); FilterOutMatchingState function = new FilterOutMatchingState(view); DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>, KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function); tester.setSideInputInGlobalWindow(view, sideInputList); Map<StateSource, GCPResourceState> outputMap = new HashMap<>(2); outputMap.put(StateSource.DESIRED, checkedPolicy); outputMap.put(StateSource.LIVE, livePolicy); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> expectedOutput = Arrays.asList(KV.of((GCPResource) project, outputMap)); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results = tester.processBatch(mainInputList); assertEquals(expectedOutput, results); }
Example #11
Source File: FilterOutMatchingStateTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testFilterStateNoMismatches() { GCPProject project = getSampleProject(""); GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(project, 1); GCPResourceState livePolicy = checkedPolicy; List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.DESIRED, checkedPolicy))); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList = Arrays.asList(KV.of((GCPResource) project, KV.of(StateSource.LIVE, livePolicy))); PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection = pipeline.apply(Create.of(sideInputList)).setCoder( KvCoder.of(SerializableCoder.of(GCPResource.class), KvCoder.of(SerializableCoder.of(StateSource.class), SerializableCoder.of(GCPResourceState.class)))); PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view = View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection); FilterOutMatchingState function = new FilterOutMatchingState(view); DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>, KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function); tester.setSideInputInGlobalWindow(view, sideInputList); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results = tester.processBatch(mainInputList); assertEquals(0, results.size()); }
Example #12
Source File: FilterOutMatchingStateTest.java From policyscanner with Apache License 2.0 | 5 votes |
@Test public void testFilterStateNoMatchingResources() { GCPProject checkedProject = getSampleProject("_checked"); GCPProject liveProject = getSampleProject("_live"); GCPResourceState checkedPolicy = getSampleGCPResourcePolicy(checkedProject, 1); GCPResourceState livePolicy = getSampleGCPResourcePolicy(liveProject, 2); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> sideInputList = Arrays.asList(KV.of((GCPResource) checkedProject, KV.of(StateSource.DESIRED, checkedPolicy))); List<KV<GCPResource, KV<StateSource, GCPResourceState>>> mainInputList = Arrays.asList(KV.of((GCPResource) liveProject, KV.of(StateSource.LIVE, livePolicy))); PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> collection = pipeline.apply(Create.of(sideInputList)).setCoder( KvCoder.of(SerializableCoder.of(GCPResource.class), KvCoder.of(SerializableCoder.of(StateSource.class), SerializableCoder.of(GCPResourceState.class)))); PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view = View.<GCPResource, KV<StateSource, GCPResourceState>>asMap().apply(collection); FilterOutMatchingState function = new FilterOutMatchingState(view); DoFnTester<KV<GCPResource, KV<StateSource, GCPResourceState>>, KV<GCPResource, Map<StateSource, GCPResourceState>>> tester = DoFnTester.of(function); tester.setSideInputInGlobalWindow(view, sideInputList); List<KV<GCPResource, Map<StateSource, GCPResourceState>>> results = tester.processBatch(mainInputList); assertEquals(0, results.size()); }
Example #13
Source File: OnDemandLiveStateChecker.java From policyscanner with Apache License 2.0 | 5 votes |
private PCollection<String> constructPipeline(Pipeline pipeline, BoundedSource<KV<List<String>, String>> knownGoodSource) { // Read files from GCS. PCollection<KV<List<String>, String>> knownGoodFiles = pipeline.apply("Read known-good data", Read.from(knownGoodSource)); // Convert files to GCPResourceState objects. PCollection<KV<GCPResource, GCPResourceState>> knownGoodStates = knownGoodFiles.apply(ParDo.named("Convert file data to Java objects") .of(new FileToState())); // Tag the state objects to indicate they're from a checked-in repo and not live. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedKnownGoodStates = knownGoodStates.apply(ParDo.named("Mark states as being known-good") .of(new TagStateWithSource(StateSource.DESIRED))); // Extract a list of checked-in projects from GCS. PCollection<List<String>> allFilePaths = knownGoodFiles .apply("Extract just the file paths", ParDo.of(new FilePathFromPair())); // Read the live version of the states of the checked-in projects. PCollection<KV<GCPResource, GCPResourceState>> liveStates = allFilePaths.apply(ParDo.named("Get live resource and states from file path") .of(new FilePathToLiveState())); // Tag the states to indicate they're live and not from a checked-in source. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedLiveStates = liveStates.apply(ParDo.named("Mark states as being live") .of(new TagStateWithSource(StateSource.LIVE))); // Join the two known-good and the live halves. PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> liveStatesView = taggedLiveStates.apply(View.<GCPResource, KV<StateSource, GCPResourceState>>asMap()); PCollection<KV<GCPResource, Map<StateSource, GCPResourceState>>> mismatchedStates = taggedKnownGoodStates.apply(ParDo.named("Find states that don't match") .withSideInputs(liveStatesView) .of(new FilterOutMatchingState(liveStatesView))); // Construct an alert message for all the discrepancies found. return mismatchedStates.apply(ParDo .named("Generate notification messages") .of(new StateDiscrepancyMessenger())); }
Example #14
Source File: DesiredStateEnforcer.java From policyscanner with Apache License 2.0 | 5 votes |
private PCollection<String> constructPipeline(Pipeline pipeline, String org, BoundedSource<KV<List<String>, String>> knownGoodSource) { // Read files from GCS. PCollection<KV<List<String>, String>> knownGoodFiles = pipeline.apply("Read known-good data", Read.from(knownGoodSource)); // Convert files to GCPResourceState objects. PCollection<KV<GCPResource, GCPResourceState>> knownGoodStates = knownGoodFiles.apply(ParDo.named("Convert file data to Java Objects") .of(new FileToState())); // Tag the state objects to indicate they're from a checked-in repo and not live. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedKnownGoodStates = knownGoodStates.apply(ParDo.named("Mark states as being known-good") .of(new TagStateWithSource(StateSource.DESIRED))); // Read projects from the CRM API. PCollection<GCPProject> allProjects = pipeline.apply("Read live projects", Read.from(new LiveProjectSource(org))); // Extract project states. PCollection<KV<GCPResource, GCPResourceState>> liveStates = allProjects .apply(ParDo.named("Extract project policies").of(new ExtractState())); // Tag the states to indicate they're live and not from a checked-in source. PCollection<KV<GCPResource, KV<StateSource, GCPResourceState>>> taggedLiveStates = liveStates.apply(ParDo.named("Mark states as being live") .of(new TagStateWithSource(StateSource.LIVE))); // Join the two known-good and the live halves. PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> knownGoodStatesView = taggedKnownGoodStates.apply(View.<GCPResource, KV<StateSource, GCPResourceState>>asMap()); PCollection<KV<GCPResource, Map<StateSource, GCPResourceState>>> mismatchedStates = taggedLiveStates.apply(ParDo.named("Find states that don't match") .withSideInputs(knownGoodStatesView) .of(new FilterOutMatchingState(knownGoodStatesView))); // Construct an alert message for all the discrepancies found and fix the discrepancies. return mismatchedStates .apply(ParDo.named("Fix discrepancies").of(discrepancyAutoFixMessenger)); }
Example #15
Source File: JoinKnownGoodAndLiveStates.java From policyscanner with Apache License 2.0 | 4 votes |
/** * Constructor for the JoinKnownGoodAndLiveStates DoFn. * @param view The PCollectionView which contains the side-input elements. */ public JoinKnownGoodAndLiveStates (PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view) { this.view = view; }
Example #16
Source File: CreatePartitionWorkPacketsDoFn.java From data-timeseries-java with Apache License 2.0 | 4 votes |
public CreatePartitionWorkPacketsDoFn(WorkPacketConfig workPacketView, PCollectionView<Map<String,WorkPacket>> sideInput) { this.workPacketView = workPacketView; this.sideInput=sideInput; }
Example #17
Source File: FlinkBatchTranslationContext.java From flink-dataflow with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public <T> DataSet<T> getSideInputDataSet(PCollectionView<?> value) { return (DataSet<T>) broadcastDataSets.get(value); }
Example #18
Source File: FlinkBatchTranslationContext.java From flink-dataflow with Apache License 2.0 | 4 votes |
public void setSideInputDataSet(PCollectionView<?> value, DataSet<?> set) { if (!broadcastDataSets.containsKey(value)) { broadcastDataSets.put(value, set); } }
Example #19
Source File: FlinkAbstractParDoWrapper.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public <T> T sideInput(PCollectionView<T> view) { throw new RuntimeException("sideInput() is not supported in Streaming mode."); }
Example #20
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 4 votes |
@Override public void translateNode(View.CreatePCollectionView<R, T> transform, FlinkBatchTranslationContext context) { DataSet<T> inputDataSet = context.getInputDataSet(context.getInput(transform)); PCollectionView<T> input = transform.apply(null); context.setSideInputDataSet(input, inputDataSet); }
Example #21
Source File: FindUnmatchedStates.java From policyscanner with Apache License 2.0 | 4 votes |
public FindUnmatchedStates( PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view) { this.view = view; }
Example #22
Source File: FilterOutMatchingState.java From policyscanner with Apache License 2.0 | 4 votes |
/** * Constructor for the FilterOutMatchingState DoFn. * @param view The PCollectionView which contains the side-input elements. */ public FilterOutMatchingState (PCollectionView<Map<GCPResource, KV<StateSource, GCPResourceState>>> view) { this.view = view; }