org.apache.beam.sdk.transforms.Contextful Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.Contextful.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<MatchResult.Metadata> expand(PCollection<String> input) { PCollection<MatchResult.Metadata> res; if (getConfiguration().getWatchInterval() == null) { res = input.apply( "Match filepatterns", ParDo.of(new MatchFn(getConfiguration().getEmptyMatchTreatment()))); } else { res = input .apply( "Continuously match filepatterns", Watch.growthOf( Contextful.of(new MatchPollFn(), Requirements.empty()), new ExtractFilenameFn()) .withPollInterval(getConfiguration().getWatchInterval()) .withTerminationPerInput(getConfiguration().getWatchTerminationCondition())) .apply(Values.create()); } return res.apply(Reshuffle.viaRandomKey()); }
Example #2
Source File: Write.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
@Override public WithFailures.Result<PDone, PubsubMessage> expand(PCollection<PubsubMessage> input) { ValueProvider<DynamicPathTemplate> pathTemplate = NestedValueProvider.of(outputPrefix, DynamicPathTemplate::new); ValueProvider<String> staticPrefix = NestedValueProvider.of(pathTemplate, value -> value.staticPrefix); FileIO.Write<List<String>, PubsubMessage> write = FileIO .<List<String>, PubsubMessage>writeDynamic() // We can't pass the attribute map to by() directly since MapCoder isn't // deterministic; // instead, we extract an ordered list of the needed placeholder values. // That list is later available to withNaming() to determine output location. .by(message -> pathTemplate.get() .extractValuesFrom(DerivedAttributesMap.of(message.getAttributeMap()))) .withDestinationCoder(ListCoder.of(StringUtf8Coder.of())) // .withCompression(compression) // .via(Contextful.fn(format::encodeSingleMessage), TextIO.sink()) // .to(staticPrefix) // .withNaming(placeholderValues -> NoColonFileNaming.defaultNaming( pathTemplate.get().replaceDynamicPart(placeholderValues), format.suffix())); if (inputType == InputType.pubsub) { // Passing a ValueProvider to withNumShards disables runner-determined sharding, so we // need to be careful to pass this only for streaming input (where runner-determined // sharding is not an option). write = write.withNumShards(numShards); } input // .apply(Window.<PubsubMessage>into(FixedWindows.of(windowDuration)) // We allow lateness up to the maximum Cloud Pub/Sub retention of 7 days documented in // https://cloud.google.com/pubsub/docs/subscriber .withAllowedLateness(Duration.standardDays(7)) // .discardingFiredPanes()) .apply(write); return WithFailures.Result.of(PDone.in(input.getPipeline()), EmptyErrors.in(input.getPipeline())); }
Example #3
Source File: ParseJsons.java From beam with Apache License 2.0 | 5 votes |
@Override public WithFailures.Result<PCollection<OutputT>, FailureT> expand(PCollection<String> input) { return input.apply( MapElements.into(new TypeDescriptor<OutputT>() {}) .via( Contextful.fn( (Contextful.Fn<String, OutputT>) (input1, c) -> readValue(input1), Requirements.empty())) .exceptionsInto(failureType) .exceptionsVia(exceptionHandler)); }
Example #4
Source File: AsJsons.java From beam with Apache License 2.0 | 5 votes |
@Override public WithFailures.Result<PCollection<String>, FailureT> expand(PCollection<InputT> input) { return input.apply( MapElements.into(TypeDescriptors.strings()) .via( Contextful.fn( (Contextful.Fn<InputT, String>) (input1, c) -> writeValue(input1), Requirements.empty())) .exceptionsInto(failureType) .exceptionsVia(exceptionHandler)); }
Example #5
Source File: TypeDescriptors.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link #inputOf(ProcessFunction)} but for {@link Contextful.Fn}. */ public static <InputT, OutputT> TypeDescriptor<InputT> inputOf( Contextful.Fn<InputT, OutputT> fn) { return TypeDescriptors.extractFromTypeParameters( fn, Contextful.Fn.class, new TypeDescriptors.TypeVariableExtractor<Contextful.Fn<InputT, OutputT>, InputT>() {}); }
Example #6
Source File: TypeDescriptors.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link #outputOf(ProcessFunction)} but for {@link Contextful.Fn}. */ public static <InputT, OutputT> TypeDescriptor<OutputT> outputOf( Contextful.Fn<InputT, OutputT> fn) { return TypeDescriptors.extractFromTypeParameters( fn, Contextful.Fn.class, new TypeDescriptors.TypeVariableExtractor<Contextful.Fn<InputT, OutputT>, OutputT>() {}); }
Example #7
Source File: FileIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testFileIoDynamicNaming() throws IOException { // Test for BEAM-6407. String outputFileName = tmpFolder.newFile().getAbsolutePath(); PCollectionView<String> outputFileNameView = p.apply("outputFileName", Create.of(outputFileName)).apply(View.asSingleton()); Contextful.Fn<String, FileIO.Write.FileNaming> fileNaming = (element, c) -> (window, pane, numShards, shardIndex, compression) -> c.sideInput(outputFileNameView) + "-" + shardIndex; p.apply(Create.of("")) .apply( "WriteDynamicFilename", FileIO.<String, String>writeDynamic() .by(SerializableFunctions.constant("")) .withDestinationCoder(StringUtf8Coder.of()) .via(TextIO.sink()) .withTempDirectory(tmpFolder.newFolder().getAbsolutePath()) .withNaming( Contextful.of( fileNaming, Requirements.requiresSideInputs(outputFileNameView)))); // We need to run the TestPipeline with the default options. p.run(PipelineOptionsFactory.create()).waitUntilFinish(); assertTrue( "Output file shard 0 exists after pipeline completes", new File(outputFileName + "-0").exists()); }
Example #8
Source File: FileIO.java From beam with Apache License 2.0 | 5 votes |
/** * Like {@link #via(Contextful, Contextful)}, but the output type of the sink is the same as the * type of the input collection. The sink function must create a new {@link Sink} instance every * time it is called. */ public Write<DestinationT, UserT> via(Contextful<Fn<DestinationT, Sink<UserT>>> sinkFn) { checkArgument(sinkFn != null, "sinkFn can not be null"); return toBuilder() .setSinkFn((Contextful) sinkFn) .setOutputFn(fn(SerializableFunctions.<UserT>identity())) .build(); }
Example #9
Source File: FileIO.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link #via(Contextful, Contextful)}, but uses the same sink for all destinations. */ public <OutputT> Write<DestinationT, UserT> via( Contextful<Fn<UserT, OutputT>> outputFn, final Sink<OutputT> sink) { checkArgument(sink != null, "sink can not be null"); checkArgument(outputFn != null, "outputFn can not be null"); return via(outputFn, fn(SerializableFunctions.clonesOf(sink))); }
Example #10
Source File: FileIO.java From beam with Apache License 2.0 | 5 votes |
/** * Specifies how to create a {@link Sink} for a particular destination and how to map the * element type to the sink's output type. The sink function must create a new {@link Sink} * instance every time it is called. */ public <OutputT> Write<DestinationT, UserT> via( Contextful<Fn<UserT, OutputT>> outputFn, Contextful<Fn<DestinationT, Sink<OutputT>>> sinkFn) { checkArgument(sinkFn != null, "sinkFn can not be null"); checkArgument(outputFn != null, "outputFn can not be null"); return toBuilder().setSinkFn((Contextful) sinkFn).setOutputFn(outputFn).build(); }
Example #11
Source File: Write.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Override public WithFailures.Result<PDone, PubsubMessage> expand(PCollection<PubsubMessage> input) { ValueProvider<String> staticPrefix = NestedValueProvider.of(pathTemplate, value -> value.staticPrefix); List<String> placeholders = pathTemplate.get().getPlaceholderNames(); if (!placeholders .containsAll(Arrays.asList("document_namespace", "document_type", "document_version"))) { throw new RuntimeException( "Path template must contain document namespace, type, and version"); } AvroEncoder encoder = new AvroEncoder(); // A ParDo is opted over a PTransform extending MapElementsWithErrors. // While this leads to manual error handling with output-tags, this allows // for side-input of the singleton SchemaStore PCollection. ParDo.MultiOutput<PubsubMessage, PubsubMessage> encodePayloadAsAvro = ParDo.of(encoder) .withOutputTags(successTag, TupleTagList.of(errorTag)); FileIO.Write<List<String>, PubsubMessage> write = FileIO .<List<String>, PubsubMessage>writeDynamic() // .by(message -> pathTemplate.get().extractValuesFrom(message.getAttributeMap())) .withDestinationCoder(ListCoder.of(StringUtf8Coder.of())) // .withCompression(compression) // .via(Contextful.fn(encoder::getSink)) // .to(staticPrefix) // .withNaming(placeholderValues -> NoColonFileNaming .defaultNaming(pathTemplate.get().replaceDynamicPart(placeholderValues), ".avro")); if (inputType == InputType.pubsub) { // Passing a ValueProvider to withNumShards disables runner-determined sharding, so we // need to be careful to pass this only for streaming input (where runner-determined // sharding is not an option). write = write.withNumShards(numShards); } // Without this, we may run into `Inputs to Flatten had incompatible window windowFns` Window<PubsubMessage> window = Window.<PubsubMessage>into(FixedWindows.of(windowDuration)) // We allow lateness up to the maximum Cloud Pub/Sub retention of 7 days documented in // https://cloud.google.com/pubsub/docs/subscriber .withAllowedLateness(Duration.standardDays(7)) // .discardingFiredPanes(); PCollectionTuple results = input.apply("encodePayloadAsAvro", encodePayloadAsAvro); results.get(successTag).apply(window).apply(write); return WithFailures.Result.of(PDone.in(input.getPipeline()), results.get(errorTag)); }
Example #12
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
@VisibleForTesting Contextful<Fn<DestinationT, FileNaming>> resolveFileNamingFn() { if (getDynamic()) { checkArgument( getConstantFileNaming() == null, "when using writeDynamic(), must use versions of .withNaming() " + "that take functions from DestinationT"); checkArgument(getFilenamePrefix() == null, ".withPrefix() requires write()"); checkArgument(getFilenameSuffix() == null, ".withSuffix() requires write()"); checkArgument( getFileNamingFn() != null, "when using writeDynamic(), must specify " + ".withNaming() taking a function form DestinationT"); return fn( (element, c) -> { FileNaming naming = getFileNamingFn().getClosure().apply(element, c); return getOutputDirectory() == null ? naming : relativeFileNaming(getOutputDirectory(), naming); }, getFileNamingFn().getRequirements()); } else { checkArgument( getFileNamingFn() == null, ".withNaming() taking a function from DestinationT requires writeDynamic()"); FileNaming constantFileNaming; if (getConstantFileNaming() == null) { constantFileNaming = defaultNaming( MoreObjects.firstNonNull(getFilenamePrefix(), StaticValueProvider.of("output")), MoreObjects.firstNonNull(getFilenameSuffix(), StaticValueProvider.of(""))); } else { checkArgument( getFilenamePrefix() == null, ".to(FileNaming) is incompatible with .withSuffix()"); checkArgument( getFilenameSuffix() == null, ".to(FileNaming) is incompatible with .withPrefix()"); constantFileNaming = getConstantFileNaming(); } if (getOutputDirectory() != null) { constantFileNaming = relativeFileNaming(getOutputDirectory(), constantFileNaming); } return fn(SerializableFunctions.<DestinationT, FileNaming>constant(constantFileNaming)); } }
Example #13
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
/** * Like {@link #withNaming(SerializableFunction)} but allows accessing context, such as side * inputs, from the function. */ public Write<DestinationT, UserT> withNaming( Contextful<Fn<DestinationT, FileNaming>> namingFn) { checkArgument(namingFn != null, "namingFn can not be null"); return toBuilder().setFileNamingFn(namingFn).build(); }
Example #14
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
/** Like {@link #by}, but with access to context such as side inputs. */ public Write<DestinationT, UserT> by(Contextful<Fn<UserT, DestinationT>> destinationFn) { checkArgument(destinationFn != null, "destinationFn can not be null"); return toBuilder().setDestinationFn(destinationFn).build(); }
Example #15
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
abstract Builder<DestinationT, UserT> setFileNamingFn( Contextful<Fn<DestinationT, FileNaming>> namingFn);
Example #16
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
abstract Builder<DestinationT, UserT> setDestinationFn( Contextful<Fn<UserT, DestinationT>> destinationFn);
Example #17
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
@Nullable abstract Contextful<Fn<DestinationT, FileNaming>> getFileNamingFn();
Example #18
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
@Nullable abstract Contextful<Fn<UserT, DestinationT>> getDestinationFn();
Example #19
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
@Nullable abstract Contextful<Fn<UserT, ?>> getOutputFn();
Example #20
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
@Nullable abstract Contextful<Fn<DestinationT, Sink<?>>> getSinkFn();
Example #21
Source File: FileIO.java From beam with Apache License 2.0 | votes |
abstract Builder<DestinationT, UserT> setOutputFn(Contextful<Fn<UserT, ?>> outputFn);
Example #22
Source File: FileIO.java From beam with Apache License 2.0 | votes |
abstract Builder<DestinationT, UserT> setSinkFn(Contextful<Fn<DestinationT, Sink<?>>> sink);