org.apache.beam.sdk.io.fs.MatchResult Java Examples
The following examples show how to use
org.apache.beam.sdk.io.fs.MatchResult.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchWithGlob() throws Exception { String globPattern = "/A/a=[0-9][0-9][0-9]/*/*"; File baseFolder = temporaryFolder.newFolder("A"); File folder1 = new File(baseFolder, "a=100"); File folder2 = new File(baseFolder, "a=233"); File dataFolder1 = new File(folder1, "data1"); File dataFolder2 = new File(folder2, "data_dir"); File expectedFile1 = new File(dataFolder1, "file1"); File expectedFile2 = new File(dataFolder2, "data_file2"); createEmptyFile(expectedFile1); createEmptyFile(expectedFile2); List<String> expected = ImmutableList.of(expectedFile1.getAbsolutePath(), expectedFile2.getAbsolutePath()); List<MatchResult> matchResults = matchGlobWithPathPrefix(temporaryFolder.getRoot().toPath(), globPattern); assertThat( toFilenames(matchResults), containsInAnyOrder(expected.toArray(new String[expected.size()]))); }
Example #2
Source File: HadoopFileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchDirectory() throws Exception { create("dir/file", "data".getBytes(StandardCharsets.UTF_8)); final MatchResult matchResult = Iterables.getOnlyElement( fileSystem.match(Collections.singletonList(testPath("dir").toString()))); assertThat( matchResult, equalTo( MatchResult.create( Status.OK, ImmutableList.of( Metadata.builder() .setResourceId(testPath("dir")) .setIsReadSeekEfficient(true) .setSizeBytes(0L) .setLastModifiedMillis(lastModified("dir")) .build())))); }
Example #3
Source File: SchemaUtils.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * The {@link SchemaUtils#getGcsFileAsString(String)} reads a file from GCS and returns it as a * string. * * @param filePath path to file in GCS * @return contents of the file as a string * @throws IOException thrown if not able to read file */ public static String getGcsFileAsString(String filePath) { MatchResult result; try { result = FileSystems.match(filePath); checkArgument( result.status() == MatchResult.Status.OK && !result.metadata().isEmpty(), "Failed to match any files with the pattern: " + filePath); List<ResourceId> rId = result.metadata().stream() .map(MatchResult.Metadata::resourceId) .collect(Collectors.toList()); checkArgument(rId.size() == 1, "Expected exactly 1 file, but got " + rId.size() + " files."); Reader reader = Channels.newReader(FileSystems.open(rId.get(0)), StandardCharsets.UTF_8.name()); return CharStreams.toString(reader); } catch (IOException ioe) { LOG.error("File system i/o error: " + ioe.getMessage()); throw new RuntimeException(ioe); } }
Example #4
Source File: S3FileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void matchNonGlobForbidden() { S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); AmazonS3Exception exception = new AmazonS3Exception("mock exception"); exception.setStatusCode(403); S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/keyname"); when(s3FileSystem .getAmazonS3Client() .getObjectMetadata( argThat( new GetObjectMetadataRequestMatcher( new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) .thenThrow(exception); assertThat( s3FileSystem.matchNonGlobPath(path), MatchResultMatcher.create(MatchResult.Status.ERROR, new IOException(exception))); }
Example #5
Source File: MatchResultMatcher.java From beam with Apache License 2.0 | 6 votes |
@Override public boolean matches(Object actual) { if (actual == null) { return false; } if (!(actual instanceof MatchResult)) { return false; } MatchResult actualResult = (MatchResult) actual; if (!expectedStatus.equals(actualResult.status())) { return false; } List<MatchResult.Metadata> actualMetadata; try { actualMetadata = actualResult.metadata(); } catch (IOException e) { return expectedException != null && expectedException.toString().equals(e.toString()); } return expectedMetadata != null && expectedMetadata.equals(actualMetadata); }
Example #6
Source File: BigQuerySourceBase.java From beam with Apache License 2.0 | 6 votes |
@Override public List<BoundedSource<T>> split(long desiredBundleSizeBytes, PipelineOptions options) throws Exception { // split() can be called multiple times, e.g. Dataflow runner may call it multiple times // with different desiredBundleSizeBytes in case the split() call produces too many sources. // We ignore desiredBundleSizeBytes anyway, however in any case, we should not initiate // another BigQuery extract job for the repeated split() calls. if (cachedSplitResult == null) { ExtractResult res = extractFiles(options); LOG.info("Extract job produced {} files", res.extractedFiles.size()); if (res.extractedFiles.size() > 0) { BigQueryOptions bqOptions = options.as(BigQueryOptions.class); final String extractDestinationDir = resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid); // Match all files in the destination directory to stat them in bulk. List<MatchResult> matches = match(ImmutableList.of(extractDestinationDir + "*")); if (matches.size() > 0) { res.metadata = matches.get(0).metadata(); } } cleanupTempResource(options.as(BigQueryOptions.class)); cachedSplitResult = checkNotNull(createSources(res.extractedFiles, res.schema, res.metadata)); } return cachedSplitResult; }
Example #7
Source File: TFRecordIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<byte[]> expand(PBegin input) { if (getFilepattern() == null) { throw new IllegalStateException( "Need to set the filepattern of a TFRecordIO.Read transform"); } if (getValidate()) { checkState(getFilepattern().isAccessible(), "Cannot validate with a RVP."); try { MatchResult matches = FileSystems.match(getFilepattern().get()); checkState( !matches.metadata().isEmpty(), "Unable to find any files matching %s", getFilepattern().get()); } catch (IOException e) { throw new IllegalStateException( String.format("Failed to validate %s", getFilepattern().get()), e); } } return input.apply("Read", org.apache.beam.sdk.io.Read.from(getSource())); }
Example #8
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchMultipleWithSubdirectoryExpansion() throws Exception { File matchedSubDir = temporaryFolder.newFolder("a"); File matchedSubDirFile = File.createTempFile("sub-dir-file", "", matchedSubDir); matchedSubDirFile.deleteOnExit(); File unmatchedSubDir = temporaryFolder.newFolder("b"); File unmatchedSubDirFile = File.createTempFile("sub-dir-file", "", unmatchedSubDir); unmatchedSubDirFile.deleteOnExit(); List<String> expected = ImmutableList.of( matchedSubDirFile.toString(), temporaryFolder.newFile("aa").toString(), temporaryFolder.newFile("ab").toString()); temporaryFolder.newFile("ba"); temporaryFolder.newFile("bb"); List<MatchResult> matchResults = matchGlobWithPathPrefix(temporaryFolder.getRoot().toPath().resolve("a"), "**"); assertThat( toFilenames(matchResults), Matchers.hasItems(expected.toArray(new String[expected.size()]))); }
Example #9
Source File: FileSystems.java From beam with Apache License 2.0 | 6 votes |
/** * Returns the {@link Metadata} for a single file resource. Expects a resource specification * {@code spec} that matches a single result. * * @param spec a resource specification that matches exactly one result. * @return the {@link Metadata} for the specified resource. * @throws FileNotFoundException if the file resource is not found. * @throws IOException in the event of an error in the inner call to {@link #match}, or if the * given spec does not match exactly 1 result. */ public static Metadata matchSingleFileSpec(String spec) throws IOException { List<MatchResult> matches = FileSystems.match(Collections.singletonList(spec)); MatchResult matchResult = Iterables.getOnlyElement(matches); if (matchResult.status() == Status.NOT_FOUND) { throw new FileNotFoundException(String.format("File spec %s not found", spec)); } else if (matchResult.status() != Status.OK) { throw new IOException( String.format("Error matching file spec %s: status %s", spec, matchResult.status())); } else { List<Metadata> metadata = matchResult.metadata(); if (metadata.size() != 1) { throw new IOException( String.format( "Expecting spec %s to match exactly one file, but matched %s: %s", spec, metadata.size(), metadata)); } return metadata.get(0); } }
Example #10
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchMultipleWithoutSubdirectoryExpansion() throws Exception { File unmatchedSubDir = temporaryFolder.newFolder("aaa"); File unmatchedSubDirFile = File.createTempFile("sub-dir-file", "", unmatchedSubDir); unmatchedSubDirFile.deleteOnExit(); List<String> expected = ImmutableList.of( temporaryFolder.newFile("a").toString(), temporaryFolder.newFile("aa").toString(), temporaryFolder.newFile("ab").toString()); temporaryFolder.newFile("ba"); temporaryFolder.newFile("bb"); List<MatchResult> matchResults = matchGlobWithPathPrefix(temporaryFolder.getRoot().toPath().resolve("a"), "*"); assertThat( toFilenames(matchResults), containsInAnyOrder(expected.toArray(new String[expected.size()]))); }
Example #11
Source File: FileSystems.java From beam with Apache License 2.0 | 6 votes |
private static KV<List<ResourceId>, List<ResourceId>> filterMissingFiles( List<ResourceId> srcResourceIds, List<ResourceId> destResourceIds) throws IOException { validateSrcDestLists(srcResourceIds, destResourceIds); if (srcResourceIds.isEmpty()) { // Short-circuit. return KV.of(Collections.<ResourceId>emptyList(), Collections.<ResourceId>emptyList()); } List<ResourceId> srcToHandle = new ArrayList<>(); List<ResourceId> destToHandle = new ArrayList<>(); List<MatchResult> matchResults = matchResources(srcResourceIds); for (int i = 0; i < matchResults.size(); ++i) { if (!matchResults.get(i).status().equals(Status.NOT_FOUND)) { srcToHandle.add(srcResourceIds.get(i)); destToHandle.add(destResourceIds.get(i)); } } return KV.of(srcToHandle, destToHandle); }
Example #12
Source File: FileIO.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<MatchResult.Metadata> expand(PCollection<String> input) { PCollection<MatchResult.Metadata> res; if (getConfiguration().getWatchInterval() == null) { res = input.apply( "Match filepatterns", ParDo.of(new MatchFn(getConfiguration().getEmptyMatchTreatment()))); } else { res = input .apply( "Continuously match filepatterns", Watch.growthOf( Contextful.of(new MatchPollFn(), Requirements.empty()), new ExtractFilenameFn()) .withPollInterval(getConfiguration().getWatchInterval()) .withTerminationPerInput(getConfiguration().getWatchTerminationCondition())) .apply(Values.create()); } return res.apply(Reshuffle.viaRandomKey()); }
Example #13
Source File: FileIO.java From beam with Apache License 2.0 | 6 votes |
/** * Converts metadata to readableFile. Make sure {@link * #shouldSkipDirectory(org.apache.beam.sdk.io.fs.MatchResult.Metadata, * org.apache.beam.sdk.io.FileIO.ReadMatches.DirectoryTreatment)} returns false before using. */ static ReadableFile matchToReadableFile( MatchResult.Metadata metadata, Compression compression) { compression = (compression == Compression.AUTO) ? Compression.detect(metadata.resourceId().getFilename()) : compression; return new ReadableFile( MatchResult.Metadata.builder() .setResourceId(metadata.resourceId()) .setSizeBytes(metadata.sizeBytes()) .setLastModifiedMillis(metadata.lastModifiedMillis()) .setIsReadSeekEfficient( metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED) .build(), compression); }
Example #14
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchDirectory() throws Exception { final Path dir = temporaryFolder.newFolder("dir").toPath(); final MatchResult matchResult = Iterables.getOnlyElement(localFileSystem.match(Collections.singletonList(dir.toString()))); assertThat( matchResult, equalTo( MatchResult.create( MatchResult.Status.OK, ImmutableList.of( MatchResult.Metadata.builder() .setResourceId(LocalResourceId.fromPath(dir, true)) .setIsReadSeekEfficient(true) .setSizeBytes(dir.toFile().length()) .setLastModifiedMillis(dir.toFile().lastModified()) .build())))); }
Example #15
Source File: SubprocessTextTransformer.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Loads into memory scripts from a File System from a given path. Supports any file system that * {@link FileSystems} supports. * * @return a collection of scripts loaded as UF8 Strings */ private static Collection<String> getScripts(String path) throws IOException { MatchResult result = FileSystems.match(path); checkArgument( result.status() == Status.OK && !result.metadata().isEmpty(), "Failed to match any files with the pattern: " + path); LOG.info("getting script!"); List<String> scripts = result .metadata() .stream() .filter(metadata -> metadata.resourceId().getFilename().endsWith(".py")) .map(Metadata::resourceId) .map( resourceId -> { try (Reader reader = Channels.newReader( FileSystems.open(resourceId), StandardCharsets.UTF_8.name())) { return CharStreams.toString(reader); } catch (IOException e) { throw new UncheckedIOException(e); } }) .collect(Collectors.toList()); return scripts; }
Example #16
Source File: S3FileSystem.java From beam with Apache License 2.0 | 5 votes |
private static MatchResult.Metadata createBeamMetadata( S3ResourceId path, String contentEncoding) { checkArgument(path.getSize().isPresent(), "path has size"); checkNotNull(contentEncoding, "contentEncoding"); boolean isReadSeekEfficient = !NON_READ_SEEK_EFFICIENT_ENCODINGS.contains(contentEncoding); return MatchResult.Metadata.builder() .setIsReadSeekEfficient(isReadSeekEfficient) .setResourceId(path) .setSizeBytes(path.getSize().get()) .setLastModifiedMillis(path.getLastModified().transform(Date::getTime).or(0L)) .build(); }
Example #17
Source File: JavascriptTextTransformer.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Loads into memory scripts from a File System from a given path. Supports any file system that * {@link FileSystems} supports. * * @return a collection of scripts loaded as UF8 Strings */ private static Collection<String> getScripts(String path) throws IOException { MatchResult result = FileSystems.match(path); checkArgument( result.status() == Status.OK && !result.metadata().isEmpty(), "Failed to match any files with the pattern: " + path); List<String> scripts = result .metadata() .stream() .filter(metadata -> metadata.resourceId().getFilename().endsWith(".js")) .map(Metadata::resourceId) .map( resourceId -> { try (Reader reader = Channels.newReader( FileSystems.open(resourceId), StandardCharsets.UTF_8.name())) { return CharStreams.toString(reader); } catch (IOException e) { throw new UncheckedIOException(e); } }) .collect(Collectors.toList()); return scripts; }
Example #18
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMatchExact() throws Exception { List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString()); temporaryFolder.newFile("aa"); temporaryFolder.newFile("ab"); List<MatchResult> matchResults = localFileSystem.match( ImmutableList.of(temporaryFolder.getRoot().toPath().resolve("a").toString())); assertThat( toFilenames(matchResults), containsInAnyOrder(expected.toArray(new String[expected.size()]))); }
Example #19
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMatchRelativeWildcardPath() throws Exception { File baseFolder = temporaryFolder.newFolder("A"); File expectedFile1 = new File(baseFolder, "file1"); expectedFile1.createNewFile(); List<String> expected = ImmutableList.of(expectedFile1.getAbsolutePath()); System.setProperty("user.dir", temporaryFolder.getRoot().toString()); List<MatchResult> matchResults = localFileSystem.match(ImmutableList.of("A/*")); assertThat( toFilenames(matchResults), containsInAnyOrder(expected.toArray(new String[expected.size()]))); }
Example #20
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMatchPatternNone() throws Exception { temporaryFolder.newFile("a"); temporaryFolder.newFile("aa"); temporaryFolder.newFile("ab"); List<MatchResult> matchResults = matchGlobWithPathPrefix(temporaryFolder.getRoot().toPath().resolve("b"), "*"); assertEquals(1, matchResults.size()); assertEquals(MatchResult.Status.NOT_FOUND, matchResults.get(0).status()); }
Example #21
Source File: S3FileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void matchNonGlobNotReadSeekEfficient() { S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); long lastModifiedMillis = 1540000000000L; ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); s3ObjectMetadata.setContentLength(100); s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); s3ObjectMetadata.setContentEncoding("gzip"); when(s3FileSystem .getAmazonS3Client() .getObjectMetadata( argThat( new GetObjectMetadataRequestMatcher( new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) .thenReturn(s3ObjectMetadata); MatchResult result = s3FileSystem.matchNonGlobPath(path); assertThat( result, MatchResultMatcher.create( ImmutableList.of( MatchResult.Metadata.builder() .setSizeBytes(100) .setLastModifiedMillis(lastModifiedMillis) .setResourceId(path) .setIsReadSeekEfficient(false) .build()))); }
Example #22
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMatchWithFileSlashPrefix() throws Exception { List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString()); temporaryFolder.newFile("aa"); temporaryFolder.newFile("ab"); String file = "file:/" + temporaryFolder.getRoot().toPath().resolve("a").toString(); List<MatchResult> results = localFileSystem.match(ImmutableList.of(file)); assertThat( toFilenames(results), containsInAnyOrder(expected.toArray(new String[expected.size()]))); }
Example #23
Source File: MatchResultMatcher.java From beam with Apache License 2.0 | 5 votes |
static MatchResultMatcher create( long sizeBytes, long lastModifiedMillis, ResourceId resourceId, boolean isReadSeekEfficient) { return create( MatchResult.Metadata.builder() .setSizeBytes(sizeBytes) .setLastModifiedMillis(lastModifiedMillis) .setResourceId(resourceId) .setIsReadSeekEfficient(isReadSeekEfficient) .build()); }
Example #24
Source File: LocalFileSystemTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMatchWithFileThreeSlashesPrefix() throws Exception { List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString()); temporaryFolder.newFile("aa"); temporaryFolder.newFile("ab"); String file = "file:///" + temporaryFolder.getRoot().toPath().resolve("a").toString(); List<MatchResult> results = localFileSystem.match(ImmutableList.of(file)); assertThat( toFilenames(results), containsInAnyOrder(expected.toArray(new String[expected.size()]))); }
Example #25
Source File: FileBasedSource.java From beam with Apache License 2.0 | 5 votes |
/** * Returns the information about the single file that this source is reading from. * * @throws IllegalArgumentException if this source is in {@link Mode#FILEPATTERN} mode. */ protected final MatchResult.Metadata getSingleFileMetadata() { checkArgument( mode == Mode.SINGLE_FILE_OR_SUBRANGE, "This function should only be called for a single file, not %s", this); checkState( singleFileMetadata != null, "It should not be possible to construct a %s in mode %s with null metadata: %s", FileBasedSource.class, mode, this); return singleFileMetadata; }
Example #26
Source File: JavascriptTextTransformer.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Loads into memory scripts from a File System from a given path. Supports any file system that * {@link FileSystems} supports. * * @return a collection of scripts loaded as UF8 Strings */ private static Collection<String> getScripts(String path) throws IOException { MatchResult result = FileSystems.match(path); checkArgument( result.status() == Status.OK && !result.metadata().isEmpty(), "Failed to match any files with the pattern: " + path); List<String> scripts = result .metadata() .stream() .filter(metadata -> metadata.resourceId().getFilename().endsWith(".js")) .map(Metadata::resourceId) .map( resourceId -> { try (Reader reader = Channels.newReader( FileSystems.open(resourceId), StandardCharsets.UTF_8.name())) { return CharStreams.toString(reader); } catch (IOException e) { throw new UncheckedIOException(e); } }) .collect(Collectors.toList()); return scripts; }
Example #27
Source File: MatchResultMatcher.java From beam with Apache License 2.0 | 5 votes |
private MatchResultMatcher( MatchResult.Status expectedStatus, List<MatchResult.Metadata> expectedMetadata, IOException expectedException) { this.expectedStatus = checkNotNull(expectedStatus); checkArgument((expectedMetadata == null) ^ (expectedException == null)); this.expectedMetadata = expectedMetadata; this.expectedException = expectedException; }
Example #28
Source File: ImportTransform.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PCollection<Export> expand(PBegin input) { NestedValueProvider<String, String> manifestFile = NestedValueProvider.of(importDirectory, s -> GcsUtil.joinPath(s, "spanner-export.json")); return input .apply("Read manifest", FileIO.match().filepattern(manifestFile)) .apply( "Resource id", MapElements.into(TypeDescriptor.of(ResourceId.class)) .via((MatchResult.Metadata::resourceId))) .apply( "Read manifest json", MapElements.into(TypeDescriptor.of(Export.class)) .via(ReadExportManifestFile::readManifest)); }
Example #29
Source File: FileBasedIOITHelper.java From beam with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c) throws IOException { MatchResult match = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(c.element()))); Set<ResourceId> resourceIds = new HashSet<>(); for (MatchResult.Metadata metadataElem : match.metadata()) { resourceIds.add(metadataElem.resourceId()); } FileSystems.delete(resourceIds); }
Example #30
Source File: FileSystems.java From beam with Apache License 2.0 | 5 votes |
private static MatchResult maybeAdjustEmptyMatchResult( String spec, MatchResult res, EmptyMatchTreatment emptyMatchTreatment) throws IOException { if (res.status() == Status.NOT_FOUND || (res.status() == Status.OK && res.metadata().isEmpty())) { boolean notFoundAllowed = emptyMatchTreatment == EmptyMatchTreatment.ALLOW || (hasGlobWildcard(spec) && emptyMatchTreatment == EmptyMatchTreatment.ALLOW_IF_WILDCARD); return notFoundAllowed ? MatchResult.create(Status.OK, Collections.emptyList()) : MatchResult.create( Status.NOT_FOUND, new FileNotFoundException("No files matched spec: " + spec)); } return res; }