org.apache.beam.sdk.values.PDone Java Examples
The following examples show how to use
org.apache.beam.sdk.values.PDone.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RepublishPerDocType.java From gcp-ingestion with Mozilla Public License 2.0 | 7 votes |
@Override public PDone expand(PCollection<PubsubMessage> input) { List<Destination> destinations = baseOptions.getPerDocTypeDestinations().entrySet().stream() .flatMap( entry -> entry.getValue().stream().map(value -> new Destination(entry.getKey(), value))) .collect(Collectors.toList()); int numDestinations = destinations.size(); int numPartitions = numDestinations + 1; PCollectionList<PubsubMessage> partitioned = input.apply("PartitionByDocType", Partition.of(numPartitions, new PartitionFn(destinations))); for (int i = 0; i < numDestinations; i++) { Destination destination = destinations.get(i); RepublisherOptions.Parsed opts = baseOptions.as(RepublisherOptions.Parsed.class); opts.setOutput(StaticValueProvider.of(destination.dest)); String name = String.join("_", "republish", destination.namespace, destination.docType); partitioned.get(i).apply(name, opts.getOutputType().write(opts)); } return PDone.in(input.getPipeline()); }
Example #2
Source File: KafkaIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<KV<K, V>> input) { checkArgument(getTopic() != null, "withTopic() is required"); KvCoder<K, V> kvCoder = (KvCoder<K, V>) input.getCoder(); return input .apply( "Kafka ProducerRecord", MapElements.via( new SimpleFunction<KV<K, V>, ProducerRecord<K, V>>() { @Override public ProducerRecord<K, V> apply(KV<K, V> element) { return new ProducerRecord<>(getTopic(), element.getKey(), element.getValue()); } })) .setCoder(ProducerRecordCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder())) .apply(getWriteRecordsTransform()); }
Example #3
Source File: TfIdf.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) { return wordToUriAndTfIdf .apply( "Format", ParDo.of( new DoFn<KV<String, KV<URI, Double>>, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output( String.format( "%s,\t%s,\t%f", c.element().getKey(), c.element().getValue().getKey(), c.element().getValue().getValue())); } })) .apply(TextIO.write().to(output).withSuffix(".csv")); }
Example #4
Source File: ExpansionService.java From beam with Apache License 2.0 | 6 votes |
default Map<String, PCollection<?>> extractOutputs(OutputT output) { if (output instanceof PDone) { return Collections.emptyMap(); } else if (output instanceof PCollection) { return ImmutableMap.of("output", (PCollection<?>) output); } else if (output instanceof PCollectionTuple) { return ((PCollectionTuple) output) .getAll().entrySet().stream() .collect(Collectors.toMap(entry -> entry.getKey().getId(), Map.Entry::getValue)); } else if (output instanceof PCollectionList<?>) { PCollectionList<?> listOutput = (PCollectionList<?>) output; return IntStream.range(0, listOutput.size()) .boxed() .collect(Collectors.toMap(Object::toString, listOutput::get)); } else { throw new UnsupportedOperationException("Unknown output type: " + output.getClass()); } }
Example #5
Source File: DirectRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private PTransform<PBegin, PDone> outputStartTo(StaticQueue<Integer> queue) { return new PTransform<PBegin, PDone>() { @Override public PDone expand(PBegin input) { input .apply(Create.of(1)) .apply( MapElements.into(TypeDescriptors.voids()) .via( in -> { queue.add(in); return null; })); return PDone.in(input.getPipeline()); } }; }
Example #6
Source File: BeamSqlBuiltinFunctionsIntegrationTestBase.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PBegin begin) { PCollection<Boolean> result = begin .apply(Create.of(DUMMY_ROW).withRowSchema(DUMMY_SCHEMA)) .apply(SqlTransform.query("SELECT " + expr)) .apply(MapElements.into(TypeDescriptors.booleans()).via(row -> row.getBoolean(0))); PAssert.that(result) .satisfies( input -> { assertTrue("Test expression is false: " + expr, Iterables.getOnlyElement(input)); return null; }); return PDone.in(begin.getPipeline()); }
Example #7
Source File: ClickHouseIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<T> input) { TableSchema tableSchema = getTableSchema(jdbcUrl(), table()); Properties properties = properties(); set(properties, ClickHouseQueryParam.MAX_INSERT_BLOCK_SIZE, maxInsertBlockSize()); set(properties, ClickHouseQueryParam.INSERT_QUORUM, insertQuorum()); set(properties, "insert_distributed_sync", insertDistributedSync()); set(properties, "insert_deduplication", insertDeduplicate()); WriteFn<T> fn = new AutoValue_ClickHouseIO_WriteFn.Builder<T>() .jdbcUrl(jdbcUrl()) .table(table()) .maxInsertBlockSize(maxInsertBlockSize()) .schema(tableSchema) .properties(properties) .initialBackoff(initialBackoff()) .maxCumulativeBackoff(maxCumulativeBackoff()) .maxRetries(maxRetries()) .build(); input.apply(ParDo.of(fn)); return PDone.in(input.getPipeline()); }
Example #8
Source File: RepublishPerNamespace.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Override public PDone expand(PCollection<PubsubMessage> input) { List<Destination> destinations = baseOptions.getPerNamespaceDestinations().entrySet().stream() .map(entry -> new Destination(entry.getKey(), entry.getValue())) .collect(Collectors.toList()); int numDestinations = destinations.size(); int numPartitions = numDestinations + 1; PCollectionList<PubsubMessage> partitioned = input.apply("PartitionByNamespace", Partition.of(numPartitions, new PartitionFn(destinations))); for (int i = 0; i < numDestinations; i++) { Destination destination = destinations.get(i); RepublisherOptions.Parsed opts = baseOptions.as(RepublisherOptions.Parsed.class); opts.setOutput(StaticValueProvider.of(destination.dest)); String name = String.join("_", "republish", destination.namespace); partitioned.get(i).apply(name, opts.getOutputType().write(opts)); } return PDone.in(input.getPipeline()); }
Example #9
Source File: WriteFailureMetricsTransform.java From feast with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<FailedElement> input) { ImportOptions options = input.getPipeline().getOptions().as(ImportOptions.class); if ("statsd".equals(options.getMetricsExporterType())) { input.apply( "WriteDeadletterMetrics", ParDo.of( WriteDeadletterRowMetricsDoFn.newBuilder() .setStatsdHost(options.getStatsdHost()) .setStatsdPort(options.getStatsdPort()) .setStoreName(getStoreName()) .build())); } else { input.apply( "Noop", ParDo.of( new DoFn<FailedElement, Void>() { @ProcessElement public void processElement(ProcessContext c) {} })); } return PDone.in(input.getPipeline()); }
Example #10
Source File: XmlIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<T> input) { checkArgument(getRecordClass() != null, "withRecordClass() is required"); checkArgument(getRootElement() != null, "withRootElement() is required"); checkArgument(getFilenamePrefix() != null, "to() is required"); checkArgument(getCharset() != null, "withCharset() is required"); try { JAXBContext.newInstance(getRecordClass()); } catch (JAXBException e) { throw new RuntimeException("Error binding classes to a JAXB Context.", e); } ResourceId prefix = FileSystems.matchNewResource(getFilenamePrefix(), false /* isDirectory */); input.apply( FileIO.<T>write() .via( sink(getRecordClass()) .withCharset(Charset.forName(getCharset())) .withRootElement(getRootElement())) .to(prefix.getCurrentDirectory().toString()) .withPrefix(prefix.getFilename()) .withSuffix(".xml") .withIgnoreWindowing()); return PDone.in(input.getPipeline()); }
Example #11
Source File: JdbcIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<T> input) { // fixme: validate invalid table input if (input.hasSchema() && !hasStatementAndSetter()) { checkArgument( inner.getTable() != null, "table cannot be null if statement is not provided"); Schema schema = input.getSchema(); List<SchemaUtil.FieldWithIndex> fields = getFilteredFields(schema); inner = inner.withStatement( JdbcUtil.generateStatement( inner.getTable(), fields.stream() .map(SchemaUtil.FieldWithIndex::getField) .collect(Collectors.toList()))); inner = inner.withPreparedStatementSetter( new AutoGeneratedPreparedStatementSetter(fields, input.getToRowFunction())); } inner.expand(input); return PDone.in(input.getPipeline()); }
Example #12
Source File: NameUtilsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testPTransformNameWithAnonOuterClass() throws Exception { AnonymousClass anonymousClassObj = new AnonymousClass() { class NamedInnerClass extends PTransform<PBegin, PDone> { @Override public PDone expand(PBegin begin) { throw new IllegalArgumentException("Should never be applied"); } } @Override public Object getInnerClassInstance() { return new NamedInnerClass(); } }; assertEquals( "NamedInnerClass", NameUtils.approximateSimpleName(anonymousClassObj.getInnerClassInstance())); assertEquals( "NameUtilsTest.NamedInnerClass", NameUtils.approximatePTransformName(anonymousClassObj.getInnerClassInstance().getClass())); }
Example #13
Source File: BigQueryDeadletterSink.java From feast with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<FailedElement> input) { TimePartitioning partition = new TimePartitioning().setType("DAY"); partition.setField(TIMESTAMP_COLUMN); input .apply("FailedElementToTableRow", ParDo.of(new FailedElementToTableRowFn())) .apply( "WriteFailedElementsToBigQuery", BigQueryIO.writeTableRows() .to(getTableSpec()) .withJsonSchema(getJsonSchema()) .withTimePartitioning(partition) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(input.getPipeline()); }
Example #14
Source File: PTransformTranslationTest.java From beam with Apache License 2.0 | 6 votes |
private static AppliedPTransform<?, ?, ?> rawPTransformWithNullSpec(Pipeline pipeline) { PTransformTranslation.RawPTransform<PBegin, PDone> rawPTransform = new PTransformTranslation.RawPTransform<PBegin, PDone>() { @Override public String getUrn() { return "fake/urn"; } @Nullable @Override public RunnerApi.FunctionSpec getSpec() { return null; } }; return AppliedPTransform.<PBegin, PDone, PTransform<PBegin, PDone>>of( "RawPTransformWithNoSpec", pipeline.begin().expand(), PDone.in(pipeline).expand(), rawPTransform, pipeline); }
Example #15
Source File: WriteToGCSAvro.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PDone expand(PCollection<KV<String, String>> kafkaRecords) { return kafkaRecords /* * Converting KV<String, String> records to GenericRecord using DoFn and {@link * KeyValueToGenericRecordFn} class. */ .apply("Create GenericRecord(s)", ParDo.of(new KeyValueToGenericRecordFn())) .setCoder(AvroCoder.of(GenericRecord.class, KeyValueToGenericRecordFn.SCHEMA)) /* * Writing as avro file using {@link AvroIO}. * * The {@link WindowedFilenamePolicy} class specifies the file path for writing the file. * The {@link withNumShards} option specifies the number of shards passed by the user. * The {@link withTempDirectory} option sets the base directory used to generate temporary files. */ .apply( "Writing as Avro", AvroIO.writeGenericRecords(KeyValueToGenericRecordFn.SCHEMA) .to( new WindowedFilenamePolicy( outputDirectory(), outputFilenamePrefix(), WriteToGCSUtility.SHARD_TEMPLATE, WriteToGCSUtility.FILE_SUFFIX_MAP.get(WriteToGCSUtility.FileFormat.AVRO))) .withTempDirectory( FileBasedSink.convertToFileResourceIfPossible(tempLocation()) .getCurrentDirectory()) .withWindowedWrites() .withNumShards(numShards())); }
Example #16
Source File: TestBoundedTable.java From beam with Apache License 2.0 | 6 votes |
@Override public POutput buildIOWriter(PCollection<Row> input) { input.apply( ParDo.of( new DoFn<Row, Void>() { @ProcessElement public void processElement(ProcessContext c) { CONTENT.add(c.element()); } @Teardown public void close() { CONTENT.clear(); } })); return PDone.in(input.getPipeline()); }
Example #17
Source File: KafkaIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<V> input) { return input .apply( "Kafka values with default key", MapElements.via( new SimpleFunction<V, KV<K, V>>() { @Override public KV<K, V> apply(V element) { return KV.of(null, element); } })) .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder())) .apply(kvWriteTransform); }
Example #18
Source File: KafkaIO.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<V> input) { return input .apply( "Kafka values with default key", MapElements.via( new SimpleFunction<V, KV<K, V>>() { @Override public KV<K, V> apply(V element) { return KV.of(null, element); } })) .setCoder(KvCoder.of(new NullOnlyCoder<>(), input.getCoder())) .apply(kvWriteTransform); }
Example #19
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<T> input) { input .apply("GroupGlobally", new GroupGlobally<>(rewindowingStrategy)) .apply("GetPane", MapElements.via(paneExtractor)) .setCoder(IterableCoder.of(input.getCoder())) .apply("RunChecks", ParDo.of(new SingletonCheckerDoFn<>(checkerFn, site))) .apply("VerifyAssertions", new DefaultConcludeTransform()); return PDone.in(input.getPipeline()); }
Example #20
Source File: WriteToText.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<InputT> teamAndScore) { if (windowed) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply(new WriteToText.WriteOneFilePerWindow(filenamePrefix)); } else { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply(TextIO.write().to(filenamePrefix)); } return PDone.in(teamAndScore.getPipeline()); }
Example #21
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<T> input) { input .apply("GroupGlobally", new GroupGlobally<>(rewindowingStrategy)) .apply("GetPane", MapElements.via(paneExtractor)) .setCoder(IterableCoder.of(input.getCoder())) .apply("RunChecks", ParDo.of(new GroupedValuesCheckerDoFn<>(checkerFn, site))) .apply("VerifyAssertions", new DefaultConcludeTransform()); return PDone.in(input.getPipeline()); }
Example #22
Source File: WriteToBigQuery.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<InputT> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); }
Example #23
Source File: PAssert.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PBegin input) { final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual); input .apply(Create.of(0).withCoder(VarIntCoder.of())) .apply("WindowToken", windowToken) .apply( "RunChecks", ParDo.of(new SideInputCheckerDoFn<>(checkerFn, actual, site)).withSideInputs(actual)) .apply("VerifyAssertions", new DefaultConcludeTransform()); return PDone.in(input.getPipeline()); }
Example #24
Source File: MongoDbIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<Document> input) { checkArgument(uri() != null, "withUri() is required"); checkArgument(database() != null, "withDatabase() is required"); checkArgument(collection() != null, "withCollection() is required"); input.apply(ParDo.of(new WriteFn(this))); return PDone.in(input.getPipeline()); }
Example #25
Source File: ErrorConverters.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<FailsafeElement<String, String>> failedRecords) { return failedRecords .apply("FailedRecordToPubSubMessage", ParDo.of(new FailedStringToPubsubMessageFn())) .apply("WriteFailedRecordsToPubSub", PubsubIO.writeMessages().to(errorRecordsTopic())); }
Example #26
Source File: SqsIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<SendMessageRequest> input) { input.apply( ParDo.of( new SqsWriteFn( new SqsConfiguration(input.getPipeline().getOptions().as(AwsOptions.class))))); return PDone.in(input.getPipeline()); }
Example #27
Source File: HBaseIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<Mutation> input) { checkArgument(configuration != null, "withConfiguration() is required"); checkArgument(tableId != null && !tableId.isEmpty(), "withTableId() is required"); try (Connection connection = ConnectionFactory.createConnection(configuration)) { Admin admin = connection.getAdmin(); checkArgument( admin.tableExists(TableName.valueOf(tableId)), "Table %s does not exist", tableId); } catch (IOException e) { LOG.warn("Error checking whether table {} exists; proceeding.", tableId, e); } input.apply(ParDo.of(new HBaseWriterFn(this))); return PDone.in(input.getPipeline()); }
Example #28
Source File: WriteToGCSText.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<KV<String, String>> kafkaRecords) { return kafkaRecords /* * Converting KV<String, String> records to String using DoFn. */ .apply( "Converting to String", ParDo.of( new DoFn<KV<String, String>, String>() { @ProcessElement public void processElement(ProcessContext c) { c.output(c.element().getValue()); } })) /* * Writing as text file using {@link TextIO}. * * The {@link WindowedFilenamePolicy} class specifies the file path for writing the file. * The {@link withNumShards} option specifies the number of shards passed by the user. * The {@link withTempDirectory} option sets the base directory used to generate temporary files. */ .apply( "Writing as Text", TextIO.write() .to( new WindowedFilenamePolicy( outputDirectory(), outputFilenamePrefix(), WriteToGCSUtility.SHARD_TEMPLATE, WriteToGCSUtility.FILE_SUFFIX_MAP.get(WriteToGCSUtility.FileFormat.TEXT))) .withTempDirectory( FileBasedSink.convertToFileResourceIfPossible(tempLocation()) .getCurrentDirectory()) .withWindowedWrites() .withNumShards(numShards())); }
Example #29
Source File: KinesisIO.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<byte[]> input) { checkArgument(getStreamName() != null, "withStreamName() is required"); checkArgument( (getPartitionKey() != null) || (getPartitioner() != null), "withPartitionKey() or withPartitioner() is required"); checkArgument( getPartitionKey() == null || (getPartitioner() == null), "only one of either withPartitionKey() or withPartitioner() is possible"); checkArgument(getAWSClientsProvider() != null, "withAWSClientsProvider() is required"); input.apply(ParDo.of(new KinesisWriterFn(this))); return PDone.in(input.getPipeline()); }
Example #30
Source File: WriteOneFilePerWindow.java From deployment-examples with MIT License | 5 votes |
@Override public PDone expand(PCollection<String> input) { ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); }