Java Code Examples for org.apache.beam.sdk.io.fs.MatchResult#Metadata
The following examples show how to use
org.apache.beam.sdk.io.fs.MatchResult#Metadata .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileIO.java From beam with Apache License 2.0 | 6 votes |
/** * @return True if metadata is a directory and directory Treatment is SKIP. * @throws java.lang.IllegalArgumentException if metadata is a directory and directoryTreatment * is Prohibited. * @throws java.lang.UnsupportedOperationException if metadata is a directory and * directoryTreatment is not SKIP or PROHIBIT. */ static boolean shouldSkipDirectory( MatchResult.Metadata metadata, DirectoryTreatment directoryTreatment) { if (metadata.resourceId().isDirectory()) { switch (directoryTreatment) { case SKIP: return true; case PROHIBIT: throw new IllegalArgumentException( "Trying to read " + metadata.resourceId() + " which is a directory"); default: throw new UnsupportedOperationException( "Unknown DirectoryTreatment: " + directoryTreatment); } } return false; }
Example 2
Source File: FileIO.java From beam with Apache License 2.0 | 6 votes |
/** * Converts metadata to readableFile. Make sure {@link * #shouldSkipDirectory(org.apache.beam.sdk.io.fs.MatchResult.Metadata, * org.apache.beam.sdk.io.FileIO.ReadMatches.DirectoryTreatment)} returns false before using. */ static ReadableFile matchToReadableFile( MatchResult.Metadata metadata, Compression compression) { compression = (compression == Compression.AUTO) ? Compression.detect(metadata.resourceId().getFilename()) : compression; return new ReadableFile( MatchResult.Metadata.builder() .setResourceId(metadata.resourceId()) .setSizeBytes(metadata.sizeBytes()) .setLastModifiedMillis(metadata.lastModifiedMillis()) .setIsReadSeekEfficient( metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED) .build(), compression); }
Example 3
Source File: AvroTableFileAsMutationsTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testFileSharding() throws Exception { Path path = tmpFolder.newFile("testfile").toPath(); int splitSize = 10000; Files.write(path, new byte[splitSize * 2]); MatchResult.Metadata fileMetadata = MatchResult.Metadata.builder() .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(splitSize * 2) .build(); PAssert.that(runFileShardingPipeline(fileMetadata, splitSize)) .satisfies( input -> { LinkedList<FileShard> shards = Lists.newLinkedList(input); assertThat(shards, hasSize(2)); shards.forEach( shard -> { assertThat( shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile")); assertThat(shard.getTableName(), equalTo("testtable")); assertThat( shard.getRange().getTo() - shard.getRange().getFrom(), equalTo(splitSize * 1L)); }); return null; }); p.run(); }
Example 4
Source File: AvroTableFileAsMutationsTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testFileShardingNotSeekable() throws Exception { Path path = tmpFolder.newFile("testfile").toPath(); int splitSize = 10000; Files.write(path, new byte[splitSize * 2]); MatchResult.Metadata fileMetadata = MatchResult.Metadata.builder() .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(false) .setSizeBytes(splitSize * 2) .build(); PAssert.that(runFileShardingPipeline(fileMetadata, splitSize)) .satisfies( input -> { LinkedList<FileShard> shards = Lists.newLinkedList(input); assertThat(shards, hasSize(1)); FileShard shard = shards.getFirst(); assertThat( shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile")); assertThat(shard.getTableName(), equalTo("testtable")); assertThat(shard.getRange().getFrom(), equalTo(0L)); assertThat(shard.getRange().getTo(), equalTo(splitSize * 2L)); return null; }); p.run(); }
Example 5
Source File: AvroTableFileAsMutationsTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test public void testFileShardingNoSharding() throws Exception { Path path = tmpFolder.newFile("testfile").toPath(); int splitSize = 10000; Files.write(path, new byte[splitSize]); MatchResult.Metadata fileMetadata = MatchResult.Metadata.builder() .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(splitSize) .build(); PAssert.that(runFileShardingPipeline(fileMetadata, splitSize)) .satisfies( input -> { LinkedList<FileShard> shards = Lists.newLinkedList(input); assertThat(shards, hasSize(1)); FileShard shard = shards.getFirst(); assertThat( shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile")); assertThat(shard.getTableName(), equalTo("testtable")); assertThat(shard.getRange().getFrom(), equalTo(0L)); assertThat(shard.getRange().getTo(), equalTo(splitSize * 1L)); return null; }); p.run(); }
Example 6
Source File: RecordFileSource.java From dataflow-opinion-analysis with Apache License 2.0 | 5 votes |
@Override protected FileBasedSource<T> createForSubrangeOfFile( MatchResult.Metadata metadata, long start, long end) { return new RecordFileSource<>(metadata, start, end, coder, separator); }
Example 7
Source File: FileIOTest.java From beam with Apache License 2.0 | 5 votes |
private static MatchResult.Metadata metadata(Path path, int size, long lastModifiedMillis) { return MatchResult.Metadata.builder() .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */)) .setIsReadSeekEfficient(true) .setSizeBytes(size) .setLastModifiedMillis(lastModifiedMillis) .build(); }
Example 8
Source File: FileBasedIOITHelper.java From beam with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c) throws IOException { MatchResult match = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(c.element()))); Set<ResourceId> resourceIds = new HashSet<>(); for (MatchResult.Metadata metadataElem : match.metadata()) { resourceIds.add(metadataElem.resourceId()); } FileSystems.delete(resourceIds); }
Example 9
Source File: MatchResultMatcher.java From beam with Apache License 2.0 | 5 votes |
private MatchResultMatcher( MatchResult.Status expectedStatus, List<MatchResult.Metadata> expectedMetadata, IOException expectedException) { this.expectedStatus = checkNotNull(expectedStatus); checkArgument((expectedMetadata == null) ^ (expectedException == null)); this.expectedMetadata = expectedMetadata; this.expectedException = expectedException; }
Example 10
Source File: TextSource.java From beam with Apache License 2.0 | 4 votes |
@Override protected FileBasedSource<String> createForSubrangeOfFile( MatchResult.Metadata metadata, long start, long end) { return new TextSource(metadata, start, end, delimiter); }
Example 11
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
@Override public PCollection<ReadableFile> expand(PCollection<MatchResult.Metadata> input) { return input.apply(ParDo.of(new ToReadableFileFn(this))); }
Example 12
Source File: MatchResultMatcher.java From beam with Apache License 2.0 | 4 votes |
static MatchResultMatcher create(List<MatchResult.Metadata> expectedMetadata) { return new MatchResultMatcher(MatchResult.Status.OK, expectedMetadata, null); }
Example 13
Source File: ReadableFileCoder.java From beam with Apache License 2.0 | 4 votes |
@Override public FileIO.ReadableFile decode(InputStream is) throws IOException { MatchResult.Metadata metadata = MetadataCoder.of().decode(is); Compression compression = Compression.values()[VarIntCoder.of().decode(is)]; return new FileIO.ReadableFile(metadata, compression); }
Example 14
Source File: MatchResultMatcher.java From beam with Apache License 2.0 | 4 votes |
private static MatchResultMatcher create(MatchResult.Metadata expectedMetadata) { return create(ImmutableList.of(expectedMetadata)); }
Example 15
Source File: FileIO.java From beam with Apache License 2.0 | 4 votes |
ReadableFile(MatchResult.Metadata metadata, Compression compression) { this.metadata = metadata; this.compression = compression; }
Example 16
Source File: TextSource.java From DataflowTemplates with Apache License 2.0 | 4 votes |
TextSource(MatchResult.Metadata metadata, long start, long end, byte[] delimiter) { super(metadata, 1L, start, end); this.delimiter = delimiter; }
Example 17
Source File: BeamHelper.java From dbeam with Apache License 2.0 | 4 votes |
public static String readFromFile(final String fileSpec) throws IOException { MatchResult.Metadata m = FileSystems.matchSingleFileSpec(fileSpec); InputStream inputStream = Channels.newInputStream(FileSystems.open(m.resourceId())); return CharStreams.toString(new InputStreamReader(inputStream, Charsets.UTF_8)); }
Example 18
Source File: BeamJdbcAvroSchema.java From dbeam with Apache License 2.0 | 4 votes |
public static Schema parseInputAvroSchemaFile(final String filename) throws IOException { MatchResult.Metadata m = FileSystems.matchSingleFileSpec(filename); InputStream inputStream = Channels.newInputStream(FileSystems.open(m.resourceId())); return new Schema.Parser().parse(inputStream); }
Example 19
Source File: BigQuerySourceBase.java From beam with Apache License 2.0 | 4 votes |
public ExtractResult( TableSchema schema, List<ResourceId> extractedFiles, List<MatchResult.Metadata> metadata) { this.schema = schema; this.extractedFiles = extractedFiles; this.metadata = metadata; }
Example 20
Source File: FileIOTest.java From beam with Apache License 2.0 | 4 votes |
@Test @Category(NeedsRunner.class) public void testMatchAndMatchAll() throws IOException { Path firstPath = tmpFolder.newFile("first").toPath(); Path secondPath = tmpFolder.newFile("second").toPath(); int firstSize = 37; int secondSize = 42; long firstModified = 1541097000L; long secondModified = 1541098000L; Files.write(firstPath, new byte[firstSize]); Files.write(secondPath, new byte[secondSize]); Files.setLastModifiedTime(firstPath, FileTime.fromMillis(firstModified)); Files.setLastModifiedTime(secondPath, FileTime.fromMillis(secondModified)); MatchResult.Metadata firstMetadata = metadata(firstPath, firstSize, firstModified); MatchResult.Metadata secondMetadata = metadata(secondPath, secondSize, secondModified); PAssert.that( p.apply( "Match existing", FileIO.match().filepattern(tmpFolder.getRoot().getAbsolutePath() + "/*"))) .containsInAnyOrder(firstMetadata, secondMetadata); PAssert.that( p.apply( "Match existing with provider", FileIO.match() .filepattern(p.newProvider(tmpFolder.getRoot().getAbsolutePath() + "/*")))) .containsInAnyOrder(firstMetadata, secondMetadata); PAssert.that( p.apply("Create existing", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/*")) .apply("MatchAll existing", FileIO.matchAll())) .containsInAnyOrder(firstMetadata, secondMetadata); PAssert.that( p.apply( "Match non-existing ALLOW", FileIO.match() .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah") .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW))) .containsInAnyOrder(); PAssert.that( p.apply( "Create non-existing", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah")) .apply( "MatchAll non-existing ALLOW", FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW))) .containsInAnyOrder(); PAssert.that( p.apply( "Match non-existing ALLOW_IF_WILDCARD", FileIO.match() .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah*") .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD))) .containsInAnyOrder(); PAssert.that( p.apply( "Create non-existing wildcard + explicit", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*")) .apply( "MatchAll non-existing ALLOW_IF_WILDCARD", FileIO.matchAll() .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD))) .containsInAnyOrder(); PAssert.that( p.apply( "Create non-existing wildcard + default", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*")) .apply("MatchAll non-existing default", FileIO.matchAll())) .containsInAnyOrder(); p.run(); }