org.apache.beam.sdk.annotations.Experimental.Kind Java Examples
The following examples show how to use
org.apache.beam.sdk.annotations.Experimental.Kind.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileBasedSink.java From beam with Apache License 2.0 | 6 votes |
@Experimental(Kind.FILESYSTEM) public ResourceId getDestinationFile( boolean windowedWrites, DynamicDestinations<?, DestinationT, ?> dynamicDestinations, int numShards, OutputFileHints outputFileHints) { checkArgument(getShard() != UNKNOWN_SHARDNUM); checkArgument(numShards > 0); FilenamePolicy policy = dynamicDestinations.getFilenamePolicy(destination); if (windowedWrites) { return policy.windowedFilename( getShard(), numShards, getWindow(), getPaneInfo(), outputFileHints); } else { return policy.unwindowedFilename(getShard(), numShards, outputFileHints); } }
Example #2
Source File: TestBigQuery.java From beam with Apache License 2.0 | 6 votes |
@Experimental(Kind.SCHEMAS) public TableDataInsertAllResponse insertRows(Schema rowSchema, Row... rows) throws IOException { List<Rows> bqRows = Arrays.stream(rows) .map(row -> new Rows().setJson(BigQueryUtils.toTableRow(row))) .collect(ImmutableList.toImmutableList()); Bigquery bq = newBigQueryClient(pipelineOptions); return bq.tabledata() .insertAll( pipelineOptions.getProject(), pipelineOptions.getTargetDataset(), table.getTableReference().getTableId(), new TableDataInsertAllRequest().setRows(bqRows)) .execute(); }
Example #3
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * A {@link Sink} for use with {@link FileIO#write} and {@link FileIO#writeDynamic}, writing * elements with a given (common) schema, like {@link #writeGenericRecords(String)}. */ @Experimental(Kind.SOURCE_SINK) public static <ElementT extends IndexedRecord> Sink<ElementT> sink(String jsonSchema) { return new AutoValue_AvroIO_Sink.Builder<ElementT>() .setJsonSchema(jsonSchema) .setMetadata(ImmutableMap.of()) .setCodec(TypedWrite.DEFAULT_SERIALIZABLE_CODEC) .build(); }
Example #4
Source File: Create.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link Create.Values} PTransform like this one that uses the given {@code Schema} * to represent objects. */ @Experimental(Kind.SCHEMAS) public Values<T> withSchema( Schema schema, TypeDescriptor<T> typeDescriptor, SerializableFunction<T, Row> toRowFunction, SerializableFunction<Row, T> fromRowFunction) { return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction)); }
Example #5
Source File: Create.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given * {@code Schema} to represent objects. */ @Experimental(Kind.SCHEMAS) public TimestampedValues<T> withSchema( Schema schema, TypeDescriptor<T> typeDescriptor, SerializableFunction<T, Row> toRowFunction, SerializableFunction<Row, T> fromRowFunction) { return withCoder(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction)); }
Example #6
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** * See {@link MatchConfiguration#continuously}. * * <p>This works only in runners supporting {@link Kind#SPLITTABLE_DO_FN}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public Read watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
Example #7
Source File: FlatMapElements.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link #via(ProcessFunction)}, but allows access to additional context. */ @Experimental(Kind.CONTEXTFUL) public <NewInputT> FlatMapElements<NewInputT, OutputT> via( Contextful<Fn<NewInputT, Iterable<OutputT>>> fn) { return new FlatMapElements<>( fn, fn.getClosure(), TypeDescriptors.inputOf(fn.getClosure()), outputType); }
Example #8
Source File: ProtoByteBuddyUtils.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator createBuilderCreator( Class<?> protoClass, Class<?> builderClass, List<FieldValueSetter<ProtoBuilderT, Object>> setters, Schema schema) { try { DynamicType.Builder<Supplier> builder = BYTE_BUDDY .with(new InjectPackageStrategy(builderClass)) .subclass(Supplier.class) .method(ElementMatchers.named("get")) .intercept(new BuilderSupplier(protoClass)); Supplier supplier = builder .visit( new AsmVisitorWrapper.ForDeclaredMethods() .writerFlags(ClassWriter.COMPUTE_FRAMES)) .make() .load(ReflectHelpers.findClassLoader(), ClassLoadingStrategy.Default.INJECTION) .getLoaded() .getDeclaredConstructor() .newInstance(); return new ProtoCreatorFactory<>(supplier, setters); } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { throw new RuntimeException( "Unable to generate a creator for class " + builderClass + " with schema " + schema); } }
Example #9
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** * See {@link TypedWrite#to(SerializableFunction, Params)}. * * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} ()} with {@link * #sink()} instead. */ @Experimental(Kind.FILESYSTEM) @Deprecated public Write to( SerializableFunction<String, Params> destinationFunction, Params emptyDestination) { return new Write( inner .to(destinationFunction, emptyDestination) .withFormatFunction(SerializableFunctions.identity())); }
Example #10
Source File: SlidingWindows.java From beam with Apache License 2.0 | 5 votes |
/** * Ensures that later sliding windows have an output time that is past the end of earlier windows. * * <p>If this is the earliest sliding window containing {@code inputTimestamp}, that's fine. * Otherwise, we pick the earliest time that doesn't overlap with earlier windows. */ @Experimental(Kind.OUTPUT_TIME) @Override public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) { Instant startOfLastSegment = window.maxTimestamp().minus(period); return startOfLastSegment.isBefore(inputTimestamp) ? inputTimestamp : startOfLastSegment.plus(1); }
Example #11
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** * See {@link TypedWrite#to(DynamicDestinations)}. * * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} ()} with {@link * #sink()} instead. */ @Experimental(Kind.FILESYSTEM) @Deprecated public Write to(DynamicDestinations<String, ?, String> dynamicDestinations) { return new Write( inner.to((DynamicDestinations) dynamicDestinations).withFormatFunction(null)); }
Example #12
Source File: PCollection.java From beam with Apache License 2.0 | 5 votes |
/** Returns the attached schema. */ @Experimental(Kind.SCHEMAS) public Schema getSchema() { if (!hasSchema()) { throw new IllegalStateException("Cannot call getSchema when there is no schema"); } return ((SchemaCoder) getCoder()).getSchema(); }
Example #13
Source File: PCollection.java From beam with Apache License 2.0 | 5 votes |
/** Returns the attached schema's fromRowFunction. */ @Experimental(Kind.SCHEMAS) public SerializableFunction<Row, T> getFromRowFunction() { if (!hasSchema()) { throw new IllegalStateException("Cannot call getFromRowFunction when there is no schema"); } return ((SchemaCoder<T>) getCoder()).getFromRowFunction(); }
Example #14
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
/** * Construct a {@link FileBasedSink} with the given temp directory, producing uncompressed files. */ @Experimental(Kind.FILESYSTEM) public FileBasedSink( ValueProvider<ResourceId> tempDirectoryProvider, DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations) { this(tempDirectoryProvider, dynamicDestinations, Compression.UNCOMPRESSED); }
Example #15
Source File: ProtoByteBuddyUtils.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) @Nullable public static <ProtoBuilderT extends MessageLite.Builder> SchemaUserTypeCreator getBuilderCreator( Class<?> protoClass, Schema schema, FieldValueTypeSupplier fieldValueTypeSupplier) { Class<ProtoBuilderT> builderClass = getProtoGeneratedBuilder(protoClass); if (builderClass == null) { return null; } Multimap<String, Method> methods = ReflectUtils.getMethodsMap(builderClass); List<FieldValueSetter<ProtoBuilderT, Object>> setters = schema.getFields().stream() .map(f -> getProtoFieldValueSetter(f, methods, builderClass)) .collect(Collectors.toList()); return createBuilderCreator(protoClass, builderClass, setters, schema); }
Example #16
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * Use a {@link DynamicAvroDestinations} object to vend {@link FilenamePolicy} objects. These * objects can examine the input record when creating a {@link FilenamePolicy}. A directory for * temporary files must be specified using {@link #withTempDirectory}. * * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} instead. */ @Experimental(Kind.FILESYSTEM) @Deprecated public <NewDestinationT> TypedWrite<UserT, NewDestinationT, OutputT> to( DynamicAvroDestinations<UserT, NewDestinationT, OutputT> dynamicDestinations) { return toBuilder() .setDynamicDestinations((DynamicAvroDestinations) dynamicDestinations) .build(); }
Example #17
Source File: JdbcIO.java From beam with Apache License 2.0 | 5 votes |
/** Read Beam {@link Row}s from a JDBC data source. */ @Experimental(Kind.SCHEMAS) public static ReadRows readRows() { return new AutoValue_JdbcIO_ReadRows.Builder() .setFetchSize(DEFAULT_FETCH_SIZE) .setOutputParallelization(true) .setStatementPreparator(ignored -> {}) .build(); }
Example #18
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link Read#watchForNewFiles}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public Parse<T> watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
Example #19
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link Read#watchForNewFiles}. */ @Experimental(Kind.SPLITTABLE_DO_FN) public ReadAll<T> watchForNewFiles( Duration pollInterval, TerminationCondition<String, ?> terminationCondition) { return withMatchConfiguration( getMatchConfiguration().continuously(pollInterval, terminationCondition)); }
Example #20
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) private static <T> PCollection<T> setBeamSchema( PCollection<T> pc, Class<T> clazz, @Nullable Schema schema) { org.apache.beam.sdk.schemas.Schema beamSchema = org.apache.beam.sdk.schemas.utils.AvroUtils.getSchema(clazz, schema); if (beamSchema != null) { pc.setSchema( beamSchema, TypeDescriptor.of(clazz), org.apache.beam.sdk.schemas.utils.AvroUtils.getToRowFunction(clazz, schema), org.apache.beam.sdk.schemas.utils.AvroUtils.getFromRowFunction(clazz)); } return pc; }
Example #21
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.FILESYSTEM) public FileResult( ResourceId tempFilename, int shard, BoundedWindow window, PaneInfo paneInfo, DestinationT destination) { checkArgument(window != null, "window can not be null"); checkArgument(paneInfo != null, "paneInfo can not be null"); this.tempFilename = tempFilename; this.shard = shard; this.window = window; this.paneInfo = paneInfo; this.destination = destination; }
Example #22
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
/** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */ @Experimental(Kind.FILESYSTEM) public FileBasedSink( ValueProvider<ResourceId> tempDirectoryProvider, DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations, Compression compression) { this(tempDirectoryProvider, dynamicDestinations, CompressionType.fromCanonical(compression)); }
Example #23
Source File: FileBasedSink.java From beam with Apache License 2.0 | 5 votes |
/** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */ @Experimental(Kind.FILESYSTEM) public FileBasedSink( ValueProvider<ResourceId> tempDirectoryProvider, DynamicDestinations<?, DestinationT, OutputT> dynamicDestinations, WritableByteChannelFactory writableByteChannelFactory) { this.tempDirectoryProvider = NestedValueProvider.of(tempDirectoryProvider, new ExtractDirectory()); this.dynamicDestinations = checkNotNull(dynamicDestinations); this.writableByteChannelFactory = writableByteChannelFactory; }
Example #24
Source File: TestStream.java From beam with Apache License 2.0 | 5 votes |
@Experimental(Kind.SCHEMAS) public static <T> Builder<T> create( Schema schema, TypeDescriptor<T> typeDescriptor, SerializableFunction<T, Row> toRowFunction, SerializableFunction<Row, T> fromRowFunction) { return create(SchemaCoder.of(schema, typeDescriptor, toRowFunction, fromRowFunction)); }
Example #25
Source File: BigQueryIO.java From beam with Apache License 2.0 | 4 votes |
@Nullable @Experimental(Kind.SCHEMAS) abstract ToBeamRowFunction<T> getToBeamRowFn();
Example #26
Source File: AvroIO.java From beam with Apache License 2.0 | 4 votes |
/** * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A * directory for temporary files must be specified using {@link #withTempDirectory}. */ @Experimental(Kind.FILESYSTEM) public TypedWrite<UserT, DestinationT, OutputT> to(FilenamePolicy filenamePolicy) { return toBuilder().setFilenamePolicy(filenamePolicy).build(); }
Example #27
Source File: TextIO.java From beam with Apache License 2.0 | 4 votes |
/** See {@link TypedWrite#to(ResourceId)}. */ @Experimental(Kind.FILESYSTEM) public Write to(ResourceId filenamePrefix) { return new Write( inner.to(filenamePrefix).withFormatFunction(SerializableFunctions.identity())); }
Example #28
Source File: TFRecordIO.java From beam with Apache License 2.0 | 4 votes |
/** Like {@link #to(ResourceId)}. */ @Experimental(Kind.FILESYSTEM) public Write toResource(ValueProvider<ResourceId> outputResource) { return toBuilder().setOutputPrefix(outputResource).build(); }
Example #29
Source File: AddFields.java From beam with Apache License 2.0 | 4 votes |
private static AddFieldsInformation getAddFieldsInformation( Schema inputSchema, Collection<NewField> fieldsToAdd) { List<NewField> newTopLevelFields = fieldsToAdd.stream() .filter(n -> !n.getDescriptor().getFieldsAccessed().isEmpty()) .collect(Collectors.toList()); List<NewField> newNestedFields = fieldsToAdd.stream() .filter(n -> !n.getDescriptor().getNestedFieldsAccessed().isEmpty()) .collect(Collectors.toList()); // Group all nested fields together by the field at the current level. For example, if adding // a.b, a.c, a.d // this map will contain a -> {a.b, a.c, a.d}. Multimap<String, NewField> newNestedFieldsMap = Multimaps.index(newNestedFields, NewField::getName); Map<Integer, AddFieldsInformation> resolvedNestedNewValues = Maps.newHashMap(); Schema.Builder builder = Schema.builder(); for (int i = 0; i < inputSchema.getFieldCount(); ++i) { Schema.Field field = inputSchema.getField(i); Collection<NewField> nestedFields = newNestedFieldsMap.get(field.getName()); // If this field is a nested field and new subfields are added further down the tree, add // those subfields before // adding to the current schema. Otherwise we just add this field as is to the new schema. if (!nestedFields.isEmpty()) { nestedFields = nestedFields.stream().map(NewField::descend).collect(Collectors.toList()); AddFieldsInformation nestedInformation = getAddFieldsInformation(field.getType(), nestedFields); field = field.withType(nestedInformation.getOutputFieldType()); resolvedNestedNewValues.put(i, nestedInformation); } builder.addField(field); } // Add any new fields at this level. List<Object> newValuesThisLevel = new ArrayList<>(newTopLevelFields.size()); for (NewField newField : newTopLevelFields) { builder.addField(newField.getName(), newField.getFieldType()); newValuesThisLevel.add(newField.getDefaultValue()); } // If there are any nested field additions left that are not already processed, that means // that the root of the // nested field doesn't exist in the schema. In this case we'll walk down the new nested // fields and recursively create each nested level as necessary. for (Map.Entry<String, Collection<NewField>> newNested : newNestedFieldsMap.asMap().entrySet()) { String fieldName = newNested.getKey(); // If the user specifies the same nested field twice in different ways (e.g. a[].x, a{}.x) FieldAccessDescriptor.FieldDescriptor fieldDescriptor = Iterables.getOnlyElement( newNested.getValue().stream() .map(NewField::getFieldDescriptor) .distinct() .collect(Collectors.toList())); FieldType fieldType = Schema.FieldType.row(Schema.of()).withNullable(true); for (Qualifier qualifier : fieldDescriptor.getQualifiers()) { // The problem with adding recursive map fields is that we don't know what the map key // type should be. // In a field descriptor of the form mapField{}.subField, the subField is assumed to be in // the map value. // Since in this code path the mapField field does not already exist this means we need to // create the new // map field, and we have no way of knowing what type the key should be. // Alternatives would be to always create a default key type (e.g. FieldType.STRING) or // extend our selector // syntax to allow specifying key types. checkArgument( !qualifier.getKind().equals(Qualifier.Kind.MAP), "Map qualifiers not supported here"); fieldType = FieldType.array(fieldType).withNullable(true); } if (!inputSchema.hasField(fieldName)) { // This is a brand-new nested field with no matching field in the input schema. We will // recursively create a nested schema to match it. Collection<NewField> nestedNewFields = newNested.getValue().stream().map(NewField::descend).collect(Collectors.toList()); AddFieldsInformation addFieldsInformation = getAddFieldsInformation(fieldType, nestedNewFields); builder.addField(fieldName, addFieldsInformation.getOutputFieldType()); resolvedNestedNewValues.put(builder.getLastFieldId(), addFieldsInformation); } } Schema schema = builder.build(); List<AddFieldsInformation> nestedNewValueList = new ArrayList<>(Collections.nCopies(schema.getFieldCount(), null)); for (Map.Entry<Integer, AddFieldsInformation> entry : resolvedNestedNewValues.entrySet()) { nestedNewValueList.set(entry.getKey(), entry.getValue()); } return AddFieldsInformation.of( Schema.FieldType.row(schema), newValuesThisLevel, nestedNewValueList); }
Example #30
Source File: Coder.java From beam with Apache License 2.0 | 4 votes |
/** Returns the {@link TypeDescriptor} for the type encoded. */ @Experimental(Kind.CODER_TYPE_ENCODING) public TypeDescriptor<T> getEncodedTypeDescriptor() { return (TypeDescriptor<T>) TypeDescriptor.of(getClass()).resolveType(new TypeDescriptor<T>() {}.getType()); }