Java Code Examples for org.apache.beam.sdk.transforms.ParDo#of()
The following examples show how to use
org.apache.beam.sdk.transforms.ParDo#of() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PrimitiveParDoSingleFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void getReplacementTransformGetFn() { DoFn<Integer, Long> originalFn = new ToLongFn(); ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(originalFn); PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3)); AppliedPTransform< PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>> application = AppliedPTransform.of( "original", input.expand(), input.apply(originalTransform).expand(), originalTransform, pipeline); PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform = factory.getReplacementTransform(application); ParDoSingle<Integer, Long> parDoSingle = (ParDoSingle<Integer, Long>) replacementTransform.getTransform(); assertThat(parDoSingle.getFn(), equalTo(originalTransform.getFn())); assertThat(parDoSingle.getFn(), equalTo(originalFn)); }
Example 2
Source File: DisplayDataEvaluatorTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testPrimitiveTransform() { PTransform<? super PCollection<Integer>, ? super PCollection<Integer>> myTransform = ParDo.of( new DoFn<Integer, Integer>() { @ProcessElement public void processElement(ProcessContext c) throws Exception {} @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }); DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(myTransform); assertThat(displayData, hasItem(hasDisplayItem("foo"))); }
Example 3
Source File: AnnotateImages.java From beam with Apache License 2.0 | 6 votes |
/** * Applies all necessary transforms to call the Vision API. In order to group requests into * batches, we assign keys to the requests, as {@link GroupIntoBatches} works only on {@link KV}s. */ @Override public PCollection<List<AnnotateImageResponse>> expand(PCollection<T> input) { ParDo.SingleOutput<T, AnnotateImageRequest> inputToRequestMapper; if (contextSideInput != null) { inputToRequestMapper = ParDo.of(new MapInputToRequest(contextSideInput)).withSideInputs(contextSideInput); } else { inputToRequestMapper = ParDo.of(new MapInputToRequest(null)); } return input .apply(inputToRequestMapper) .apply( WithKeys.of( (SerializableFunction<AnnotateImageRequest, Integer>) ignored -> new Random().nextInt(desiredRequestParallelism)) .withKeyType(TypeDescriptors.integers())) .apply(GroupIntoBatches.ofSize(batchSize)) .apply(ParDo.of(new PerformImageAnnotation())); }
Example 4
Source File: NexmarkUtils.java From beam with Apache License 2.0 | 5 votes |
/** Return a transform to log each element, passing it through unchanged. */ public static <T> ParDo.SingleOutput<T, T> log(final String name) { return ParDo.of( new DoFn<T, T>() { @ProcessElement public void processElement(ProcessContext c) { LOG.info("%s: %s", name, c.element()); c.output(c.element()); } }); }
Example 5
Source File: TypedPValueTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testFinishSpecifyingShouldFailIfNoCoderInferrable() { p.enableAbandonedNodeEnforcement(false); PCollection<Integer> created = p.apply(Create.of(1, 2, 3)); ParDo.SingleOutput<Integer, EmptyClass> uninferrableParDo = ParDo.of(new EmptyClassDoFn()); PCollection<EmptyClass> unencodable = created.apply(uninferrableParDo); thrown.expect(IllegalStateException.class); thrown.expectMessage("Unable to return a default Coder"); thrown.expectMessage("Inferring a Coder from the CoderRegistry failed"); unencodable.finishSpecifying(created, uninferrableParDo); }
Example 6
Source File: StructuredStreamingPipelineStateTest.java From beam with Apache License 2.0 | 5 votes |
private ParDo.SingleOutput<String, String> printParDo(final String prefix) { return ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(final ProcessContext c) { System.out.println(prefix + " " + c.element()); } }); }
Example 7
Source File: NexmarkUtils.java From beam with Apache License 2.0 | 5 votes |
/** Return a transform to cast each element to {@link KnownSize}. */ private static <T extends KnownSize> ParDo.SingleOutput<T, KnownSize> castToKnownSize() { return ParDo.of( new DoFn<T, KnownSize>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element()); } }); }
Example 8
Source File: SparkPipelineStateTest.java From beam with Apache License 2.0 | 5 votes |
private ParDo.SingleOutput<String, String> printParDo(final String prefix) { return ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(final ProcessContext c) { System.out.println(prefix + " " + c.element()); } }); }
Example 9
Source File: NexmarkUtils.java From beam with Apache License 2.0 | 5 votes |
/** Return a transform to make explicit the timestamp of each element. */ public static <T> ParDo.SingleOutput<T, TimestampedValue<T>> stamp(String name) { return ParDo.of( new DoFn<T, TimestampedValue<T>>() { @ProcessElement public void processElement(ProcessContext c) { c.output(TimestampedValue.of(c.element(), c.timestamp())); } }); }
Example 10
Source File: NexmarkUtils.java From beam with Apache License 2.0 | 5 votes |
/** Return a transform to count and discard each element. */ public static <T> ParDo.SingleOutput<T, Void> devNull(final String name) { return ParDo.of( new DoFn<T, Void>() { final Counter discardedCounterMetric = Metrics.counter(name, "discarded"); @ProcessElement public void processElement(ProcessContext c) { discardedCounterMetric.inc(); } }); }
Example 11
Source File: NexmarkUtils.java From beam with Apache License 2.0 | 5 votes |
/** Return a transform to pass-through events, but count them as they go by. */ public static ParDo.SingleOutput<Event, Event> snoop(final String name) { return ParDo.of( new DoFn<Event, Event>() { final Counter eventCounter = Metrics.counter(name, "events"); final Counter newPersonCounter = Metrics.counter(name, "newPersons"); final Counter newAuctionCounter = Metrics.counter(name, "newAuctions"); final Counter bidCounter = Metrics.counter(name, "bids"); final Counter endOfStreamCounter = Metrics.counter(name, "endOfStream"); @ProcessElement public void processElement(ProcessContext c) { eventCounter.inc(); if (c.element().newPerson != null) { newPersonCounter.inc(); } else if (c.element().newAuction != null) { newAuctionCounter.inc(); } else if (c.element().bid != null) { bidCounter.inc(); } else { endOfStreamCounter.inc(); } info("%s snooping element %s", name, c.element()); c.output(c.element()); } }); }
Example 12
Source File: TalendIOTest.java From component-runtime with Apache License 2.0 | 5 votes |
private ParDo.SingleOutput<Sample, Record> toRecord() { return ParDo.of(new DoFn<Sample, Record>() { @ProcessElement public void toData(final ProcessContext sample) { final Sample element = sample.element(); final RecordBuilderFactory builderFactory = new AvroRecordBuilderFactoryProvider().apply(null); sample.output(builderFactory.newRecordBuilder().withString("data", element.getData()).build()); } }); }
Example 13
Source File: TalendIOTest.java From component-runtime with Apache License 2.0 | 5 votes |
private ParDo.SingleOutput<Record, SampleLength> toSampleLength() { return ParDo.of(new DoFn<Record, SampleLength>() { @ProcessElement public void onElement(final ProcessContext ctx) { final Collection<Record> array = ctx.element().getArray(Record.class, "__default__"); ctx.output(new SampleLength(array.iterator().next().getString("data").length())); } }); }
Example 14
Source File: TalendIOTest.java From component-runtime with Apache License 2.0 | 5 votes |
private ParDo.SingleOutput<SampleLength, Integer> toInt() { return ParDo.of(new DoFn<SampleLength, Integer>() { @ProcessElement public void toInt(final ProcessContext pc) { pc.output(pc.element().len); } }); }
Example 15
Source File: SnowflakeIO.java From beam with Apache License 2.0 | 5 votes |
private ParDo.SingleOutput<Object, Object> copyToTable( SnowflakeService snowflakeService, String stagingBucketDir) { return ParDo.of( new CopyToTableFn<>( getDataSourceProviderFn(), getTable(), getQuery(), stagingBucketDir, getStorageIntegrationName(), getWriteDisposition(), snowflakeService)); }
Example 16
Source File: TransformTransform.java From hop with Apache License 2.0 | 4 votes |
@Override public PCollectionTuple expand( PCollection<HopRow> input ) { try { // Only initialize once on this node/vm // BeamHop.init( transformPluginClasses, xpPluginClasses ); // Similar for the output : treate a TupleTag list for the target transforms... // TupleTag<HopRow> mainOutputTupleTag = new TupleTag<HopRow>( HopBeamUtil.createMainOutputTupleId( transformName ) ) { }; List<TupleTag<HopRow>> targetTupleTags = new ArrayList<>(); TupleTagList targetTupleTagList = null; for ( String targetStep : targetSteps ) { String tupleId = HopBeamUtil.createTargetTupleId( transformName, targetStep ); TupleTag<HopRow> tupleTag = new TupleTag<HopRow>( tupleId ) { }; targetTupleTags.add( tupleTag ); if ( targetTupleTagList == null ) { targetTupleTagList = TupleTagList.of( tupleTag ); } else { targetTupleTagList = targetTupleTagList.and( tupleTag ); } } if ( targetTupleTagList == null ) { targetTupleTagList = TupleTagList.empty(); } // Create a new transform function, initializes the transform // StepFn stepFn = new StepFn( variableValues, metastoreJson, transformPluginClasses, xpPluginClasses, transformName, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep, targetSteps, infoSteps, infoRowMetaJsons ); // The actual transform functionality // ParDo.SingleOutput<HopRow, HopRow> parDoStepFn = ParDo.of( stepFn ); // Add optional side inputs... // if ( infoCollectionViews.size() > 0 ) { parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews ); } // Specify the main output and targeted outputs // ParDo.MultiOutput<HopRow, HopRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList ); // Apply the multi output parallel do transform function to the main input stream // PCollectionTuple collectionTuple = input.apply( multiOutput ); // In the tuple is everything we need to find. // Just make sure to retrieve the PCollections using the correct Tuple ID // Use HopBeamUtil.createTargetTupleId()... to make sure // return collectionTuple; } catch ( Exception e ) { numErrors.inc(); LOG.error( "Error transforming data in transform '" + transformName + "'", e ); throw new RuntimeException( "Error transforming data in transform", e ); } }
Example 17
Source File: Transforms.java From nomulus with Apache License 2.0 | 4 votes |
/** * Returns CommitLog files with timestamps between {@code fromTime} (inclusive) and {@code * endTime} (exclusive). */ public static PTransform<PCollection<? extends String>, PCollection<String>> filterCommitLogsByTime(DateTime fromTime, DateTime toTime) { return ParDo.of(new FilterCommitLogFileByTime(fromTime, toTime)); }
Example 18
Source File: Monitor.java From beam with Apache License 2.0 | 4 votes |
public Monitor(String name, String prefix) { this.name = name; this.prefix = prefix; doFn = new MonitorDoFn(); transform = ParDo.of(doFn); }
Example 19
Source File: StepTransform.java From kettle-beam with Apache License 2.0 | 4 votes |
@Override public PCollectionTuple expand( PCollection<KettleRow> input ) { try { // Only initialize once on this node/vm // BeamKettle.init( stepPluginClasses, xpPluginClasses ); // Similar for the output : treate a TupleTag list for the target steps... // TupleTag<KettleRow> mainOutputTupleTag = new TupleTag<KettleRow>( KettleBeamUtil.createMainOutputTupleId( stepname ) ) { }; List<TupleTag<KettleRow>> targetTupleTags = new ArrayList<>(); TupleTagList targetTupleTagList = null; for ( String targetStep : targetSteps ) { String tupleId = KettleBeamUtil.createTargetTupleId( stepname, targetStep ); TupleTag<KettleRow> tupleTag = new TupleTag<KettleRow>( tupleId ) { }; targetTupleTags.add( tupleTag ); if ( targetTupleTagList == null ) { targetTupleTagList = TupleTagList.of( tupleTag ); } else { targetTupleTagList = targetTupleTagList.and( tupleTag ); } } if ( targetTupleTagList == null ) { targetTupleTagList = TupleTagList.empty(); } // Create a new step function, initializes the step // StepFn stepFn = new StepFn( variableValues, metastoreJson, stepPluginClasses, xpPluginClasses, stepname, stepPluginId, stepMetaInterfaceXml, inputRowMetaJson, inputStep, targetSteps, infoSteps, infoRowMetaJsons ); // The actual step functionality // ParDo.SingleOutput<KettleRow, KettleRow> parDoStepFn = ParDo.of( stepFn ); // Add optional side inputs... // if ( infoCollectionViews.size() > 0 ) { parDoStepFn = parDoStepFn.withSideInputs( infoCollectionViews ); } // Specify the main output and targeted outputs // ParDo.MultiOutput<KettleRow, KettleRow> multiOutput = parDoStepFn.withOutputTags( mainOutputTupleTag, targetTupleTagList ); // Apply the multi output parallel do step function to the main input stream // PCollectionTuple collectionTuple = input.apply( multiOutput ); // In the tuple is everything we need to find. // Just make sure to retrieve the PCollections using the correct Tuple ID // Use KettleBeamUtil.createTargetTupleId()... to make sure // return collectionTuple; } catch ( Exception e ) { numErrors.inc(); LOG.error( "Error transforming data in step '" + stepname + "'", e ); throw new RuntimeException( "Error transforming data in step", e ); } }
Example 20
Source File: HCatToRow.java From beam with Apache License 2.0 | 2 votes |
/** * Creates a {@link PTransform} that converts incoming {@link HCatRecord HCatRecords} to {@link * Row Rows} using specified schema. * * <p>If there is a mismatch between the schema specified here and actual record schema, or * internal representation and schema, then runtime errors will happen. */ private static PTransform<PCollection<? extends HCatRecord>, PCollection<Row>> forSchema( Schema schema) { return ParDo.of(new HCatToRowFn(schema)); }