org.apache.beam.sdk.io.fs.EmptyMatchTreatment Java Examples
The following examples show how to use
org.apache.beam.sdk.io.fs.EmptyMatchTreatment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testEmptyFilepatternTreatmentAllow() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); TestFileBasedSource source = new TestFileBasedSource( new File(tempFolder.getRoot(), "doesNotExist").getPath(), EmptyMatchTreatment.ALLOW, 64, null); TestFileBasedSource sourceWithWildcard = new TestFileBasedSource( new File(tempFolder.getRoot(), "doesNotExist*").getPath(), EmptyMatchTreatment.ALLOW_IF_WILDCARD, 64, null); assertEquals(0, readFromSource(source, options).size()); assertEquals(0, readFromSource(sourceWithWildcard, options).size()); }
Example #2
Source File: AvroIO.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private static <T> AvroSource<T> createSource( ValueProvider<String> filepattern, EmptyMatchTreatment emptyMatchTreatment, Class<T> recordClass, Schema schema, @Nullable AvroSource.DatumReaderFactory<T> readerFactory) { AvroSource<?> source = AvroSource.from(filepattern).withEmptyMatchTreatment(emptyMatchTreatment); if (readerFactory != null) { source = source.withDatumReaderFactory(readerFactory); } return recordClass == GenericRecord.class ? (AvroSource<T>) source.withSchema(schema) : source.withSchema(recordClass); }
Example #3
Source File: Transforms.java From nomulus with Apache License 2.0 | 5 votes |
/** * Returns a {@link PTransform} from file name patterns to file {@link Metadata Metadata records}. */ public static PTransform<PCollection<String>, PCollection<Metadata>> getFilesByPatterns() { return new PTransform<PCollection<String>, PCollection<Metadata>>() { @Override public PCollection<Metadata> expand(PCollection<String> input) { return input.apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); } }; }
Example #4
Source File: FileIO.java From beam with Apache License 2.0 | 5 votes |
@Override public Watch.Growth.PollResult<MatchResult.Metadata> apply(String element, Context c) throws Exception { Instant now = Instant.now(); return Watch.Growth.PollResult.incomplete( now, FileSystems.match(element, EmptyMatchTreatment.ALLOW).metadata()) .withWatermark(now); }
Example #5
Source File: TextIO.java From beam with Apache License 2.0 | 5 votes |
/** * A {@link PTransform} that reads from one or more text files and returns a bounded {@link * PCollection} containing one element for each line of the input files. */ public static Read read() { return new AutoValue_TextIO_Read.Builder() .setCompression(Compression.AUTO) .setHintMatchesManyFiles(false) .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) .build(); }
Example #6
Source File: AvroSource.java From beam with Apache License 2.0 | 5 votes |
/** * Reads from the given file name or pattern ("glob"). The returned source needs to be further * configured by calling {@link #withSchema} to return a type other than {@link GenericRecord}. */ public static AvroSource<GenericRecord> from(ValueProvider<String> fileNameOrPattern) { return new AvroSource<>( fileNameOrPattern, EmptyMatchTreatment.DISALLOW, DEFAULT_MIN_BUNDLE_SIZE, readGenericRecordsWithSchema(null /* will need to be specified in withSchema */, null)); }
Example #7
Source File: AvroSource.java From beam with Apache License 2.0 | 5 votes |
/** Constructor for FILEPATTERN mode. */ private AvroSource( ValueProvider<String> fileNameOrPattern, EmptyMatchTreatment emptyMatchTreatment, long minBundleSize, Mode<T> mode) { super(fileNameOrPattern, emptyMatchTreatment, minBundleSize); this.mode = mode; }
Example #8
Source File: WriteFilesTest.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<KV<DestinationT, String>> input) { input .apply(Values.create()) .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); return PDone.in(input.getPipeline()); }
Example #9
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 5 votes |
public TestFileBasedSource( String fileOrPattern, EmptyMatchTreatment emptyMatchTreatment, long minBundleSize, String splitHeader) { super(StaticValueProvider.of(fileOrPattern), emptyMatchTreatment, minBundleSize); this.splitHeader = splitHeader; }
Example #10
Source File: FileBasedSource.java From beam with Apache License 2.0 | 5 votes |
/** * Create a {@code FileBaseSource} based on a file or a file pattern specification, with the given * strategy for treating filepatterns that do not match any files. */ protected FileBasedSource( ValueProvider<String> fileOrPatternSpec, EmptyMatchTreatment emptyMatchTreatment, long minBundleSize) { super(0, Long.MAX_VALUE, minBundleSize); this.mode = Mode.FILEPATTERN; this.emptyMatchTreatment = emptyMatchTreatment; this.fileOrPatternSpec = fileOrPatternSpec; }
Example #11
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEmptyFilepatternTreatmentAllowIfWildcard() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); TestFileBasedSource source = new TestFileBasedSource( new File(tempFolder.getRoot(), "doesNotExist").getPath(), EmptyMatchTreatment.ALLOW_IF_WILDCARD, 64, null); thrown.expect(FileNotFoundException.class); readFromSource(source, options); }
Example #12
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
@Override public FileBasedSource<T> apply(String input) { return Read.createSource( StaticValueProvider.of(input), EmptyMatchTreatment.DISALLOW, recordClass, schemaSupplier.get(), readerFactory); }
Example #13
Source File: FileIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testMatchDisallowEmptyExplicit() throws IOException { p.apply( FileIO.match() .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/*") .withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); thrown.expectCause(isA(FileNotFoundException.class)); p.run(); }
Example #14
Source File: FileIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testMatchDisallowEmptyNonWildcard() throws IOException { p.apply( FileIO.match() .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah") .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD)); thrown.expectCause(isA(FileNotFoundException.class)); p.run(); }
Example #15
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * Reads Avro file(s) containing records of an unspecified schema and converting each record to a * custom type. */ public static <T> Parse<T> parseGenericRecords(SerializableFunction<GenericRecord, T> parseFn) { return new AutoValue_AvroIO_Parse.Builder<T>() .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) .setParseFn(parseFn) .setHintMatchesManyFiles(false) .build(); }
Example #16
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** Reads Avro file(s) containing records of the specified schema. */ public static Read<GenericRecord> readGenericRecords(Schema schema) { return new AutoValue_AvroIO_Read.Builder<GenericRecord>() .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) .setRecordClass(GenericRecord.class) .setSchema(schema) .setInferBeamSchema(false) .setHintMatchesManyFiles(false) .build(); }
Example #17
Source File: AvroTableFileAsMutations.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PCollection<Mutation> expand(PCollection<KV<String, String>> filesToTables) { // Map<filename,tablename> PCollectionView<Map<String, String>> filenamesToTableNamesMapView = filesToTables.apply("asView", View.asMap()); return filesToTables .apply("Get Filenames", Keys.create()) // PCollection<String> .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)) // PCollection<Match.Metadata> .apply(FileIO.readMatches()) // Pcollection<FileIO.ReadableFile> .apply( "Split into ranges", ParDo.of( new SplitIntoRangesFn( SplitIntoRangesFn.DEFAULT_BUNDLE_SIZE, filenamesToTableNamesMapView)) .withSideInputs(filenamesToTableNamesMapView)) .setCoder(FileShard.Coder.of()) // PCollection<FileShard> .apply("Reshuffle", Reshuffle.viaRandomKey()) // PCollection<FileShard> .apply("Read ranges", ParDo.of(new ReadFileRangesFn(ddlView)).withSideInputs(ddlView)); }
Example #18
Source File: ImportTransform.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<String, TableManifest>> expand(PCollection<KV<String, String>> input) { return input.apply( "Read table manifest", ParDo.of( new DoFn<KV<String, String>, KV<String, TableManifest>>() { @ProcessElement public void processElement(ProcessContext c) { try { KV<String, String> kv = c.element(); String filePath = GcsUtil.joinPath(importDirectory.get(), kv.getValue()); MatchResult match = FileSystems.match(filePath, EmptyMatchTreatment.DISALLOW); ResourceId resourceId = match.metadata().get(0).resourceId(); TableManifest.Builder builder = TableManifest.newBuilder(); try (InputStream stream = Channels.newInputStream(FileSystems.open(resourceId))) { Reader reader = new InputStreamReader(stream); JsonFormat.parser().merge(reader, builder); } c.output(KV.of(kv.getKey(), builder.build())); } catch (IOException e) { throw new RuntimeException(e); } } })); }
Example #19
Source File: TextSourceTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<String> files) { return files // PCollection<String> .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)) // PCollection<Match.Metadata> .apply(FileIO.readMatches()) // PCollection<FileIO.ReadableFile> .apply("Read lines", ParDo.of(new FileReadDoFn())); // PCollection<String>: line }
Example #20
Source File: TextIOReadTest.java From beam with Apache License 2.0 | 5 votes |
private static TextSource prepareSource( TemporaryFolder temporaryFolder, byte[] data, byte[] delimiter) throws IOException { Path path = temporaryFolder.newFile().toPath(); Files.write(path, data); return new TextSource( ValueProvider.StaticValueProvider.of(path.toString()), EmptyMatchTreatment.DISALLOW, delimiter); }
Example #21
Source File: TextSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<String> files) { return files // PCollection<String> .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)) // PCollection<Match.Metadata> .apply(FileIO.readMatches()) // PCollection<FileIO.ReadableFile> .apply("Read lines", ParDo.of(new FileReadDoFn())); // PCollection<String>: line }
Example #22
Source File: BlockBasedSource.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link #BlockBasedSource(String, EmptyMatchTreatment, long)}. */ public BlockBasedSource( ValueProvider<String> fileOrPatternSpec, EmptyMatchTreatment emptyMatchTreatment, long minBundleSize) { super(fileOrPatternSpec, emptyMatchTreatment, minBundleSize); }
Example #23
Source File: FileIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testMatchAllDisallowEmptyNonWildcard() throws IOException { p.apply(Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah")) .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD)); thrown.expectCause(isA(FileNotFoundException.class)); p.run(); }
Example #24
Source File: FileSystems.java From beam with Apache License 2.0 | 5 votes |
/** Like {@link #match(List)}, but with a configurable {@link EmptyMatchTreatment}. */ public static List<MatchResult> match(List<String> specs, EmptyMatchTreatment emptyMatchTreatment) throws IOException { List<MatchResult> matches = getFileSystemInternal(getOnlyScheme(specs)).match(specs); List<MatchResult> res = Lists.newArrayListWithExpectedSize(matches.size()); for (int i = 0; i < matches.size(); i++) { res.add(maybeAdjustEmptyMatchResult(specs.get(i), matches.get(i), emptyMatchTreatment)); } return res; }
Example #25
Source File: FileIOTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void testMatchAllDisallowEmptyExplicit() throws IOException { p.apply(Create.of(tmpFolder.getRoot().getAbsolutePath() + "/*")) .apply(FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.DISALLOW)); thrown.expectCause(isA(FileNotFoundException.class)); p.run(); }
Example #26
Source File: FileSystems.java From beam with Apache License 2.0 | 5 votes |
private static MatchResult maybeAdjustEmptyMatchResult( String spec, MatchResult res, EmptyMatchTreatment emptyMatchTreatment) throws IOException { if (res.status() == Status.NOT_FOUND || (res.status() == Status.OK && res.metadata().isEmpty())) { boolean notFoundAllowed = emptyMatchTreatment == EmptyMatchTreatment.ALLOW || (hasGlobWildcard(spec) && emptyMatchTreatment == EmptyMatchTreatment.ALLOW_IF_WILDCARD); return notFoundAllowed ? MatchResult.create(Status.OK, Collections.emptyList()) : MatchResult.create( Status.NOT_FOUND, new FileNotFoundException("No files matched spec: " + spec)); } return res; }
Example #27
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * Reads records of the given type from an Avro file (or multiple Avro files matching a pattern). * * <p>The schema must be specified using one of the {@code withSchema} functions. */ public static <T> Read<T> read(Class<T> recordClass) { return new AutoValue_AvroIO_Read.Builder<T>() .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) .setRecordClass(recordClass) .setSchema(ReflectData.get().getSchema(recordClass)) .setInferBeamSchema(false) .setHintMatchesManyFiles(false) .build(); }
Example #28
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * Like {@link #read}, but reads each filepattern in the input {@link PCollection}. * * @deprecated You can achieve The functionality of {@link #readAll} using {@link FileIO} matching * plus {@link #readFiles(Class)}. This is the preferred method to make composition explicit. * {@link ReadAll} will not receive upgrades and will be removed in a future version of Beam. */ @Deprecated public static <T> ReadAll<T> readAll(Class<T> recordClass) { return new AutoValue_AvroIO_ReadAll.Builder<T>() .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.ALLOW_IF_WILDCARD)) .setRecordClass(recordClass) .setSchema(ReflectData.get().getSchema(recordClass)) .setInferBeamSchema(false) .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) .build(); }
Example #29
Source File: AvroIO.java From beam with Apache License 2.0 | 5 votes |
/** * Like {@link #readGenericRecords(Schema)}, but for a {@link PCollection} of {@link * FileIO.ReadableFile}, for example, returned by {@link FileIO#readMatches}. * * @deprecated You can achieve The functionality of {@link #readAllGenericRecords(Schema)} using * {@link FileIO} matching plus {@link #readFilesGenericRecords(Schema)}. This is the * preferred method to make composition explicit. {@link ReadAll} will not receive upgrades * and will be removed in a future version of Beam. */ @Deprecated public static ReadAll<GenericRecord> readAllGenericRecords(Schema schema) { return new AutoValue_AvroIO_ReadAll.Builder<GenericRecord>() .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.ALLOW_IF_WILDCARD)) .setRecordClass(GenericRecord.class) .setSchema(schema) .setInferBeamSchema(false) .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) .build(); }
Example #30
Source File: FileIOTest.java From beam with Apache License 2.0 | 4 votes |
@Test @Category(NeedsRunner.class) public void testMatchAndMatchAll() throws IOException { Path firstPath = tmpFolder.newFile("first").toPath(); Path secondPath = tmpFolder.newFile("second").toPath(); int firstSize = 37; int secondSize = 42; long firstModified = 1541097000L; long secondModified = 1541098000L; Files.write(firstPath, new byte[firstSize]); Files.write(secondPath, new byte[secondSize]); Files.setLastModifiedTime(firstPath, FileTime.fromMillis(firstModified)); Files.setLastModifiedTime(secondPath, FileTime.fromMillis(secondModified)); MatchResult.Metadata firstMetadata = metadata(firstPath, firstSize, firstModified); MatchResult.Metadata secondMetadata = metadata(secondPath, secondSize, secondModified); PAssert.that( p.apply( "Match existing", FileIO.match().filepattern(tmpFolder.getRoot().getAbsolutePath() + "/*"))) .containsInAnyOrder(firstMetadata, secondMetadata); PAssert.that( p.apply( "Match existing with provider", FileIO.match() .filepattern(p.newProvider(tmpFolder.getRoot().getAbsolutePath() + "/*")))) .containsInAnyOrder(firstMetadata, secondMetadata); PAssert.that( p.apply("Create existing", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/*")) .apply("MatchAll existing", FileIO.matchAll())) .containsInAnyOrder(firstMetadata, secondMetadata); PAssert.that( p.apply( "Match non-existing ALLOW", FileIO.match() .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah") .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW))) .containsInAnyOrder(); PAssert.that( p.apply( "Create non-existing", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah")) .apply( "MatchAll non-existing ALLOW", FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW))) .containsInAnyOrder(); PAssert.that( p.apply( "Match non-existing ALLOW_IF_WILDCARD", FileIO.match() .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah*") .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD))) .containsInAnyOrder(); PAssert.that( p.apply( "Create non-existing wildcard + explicit", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*")) .apply( "MatchAll non-existing ALLOW_IF_WILDCARD", FileIO.matchAll() .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD))) .containsInAnyOrder(); PAssert.that( p.apply( "Create non-existing wildcard + default", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*")) .apply("MatchAll non-existing default", FileIO.matchAll())) .containsInAnyOrder(); p.run(); }