Java Code Examples for org.apache.beam.sdk.io.FileBasedSink#convertToFileResourceIfPossible()
The following examples show how to use
org.apache.beam.sdk.io.FileBasedSink#convertToFileResourceIfPossible() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DynamicOneFilePerWindow.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<KV<String, String>> input) { PCollection<String> contents = input.apply( ParDo.of( new DoFn<KV<String, String>, String>() { @ProcessElement public void processElement(ProcessContext c) { filenamePrefix = String.format("%s%s", filenamePrefix, c.element().getKey()); LOG.info("File Prefix {}", filenamePrefix); c.output(c.element().getValue()); } })); ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return contents.apply(write); }
Example 2
Source File: WriteOneFilePerWindow.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<String> input) { ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); }
Example 3
Source File: WriteOneFilePerWindow.java From incubator-nemo with Apache License 2.0 | 5 votes |
@Override public PDone expand(final PCollection<String> input) { final ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); }
Example 4
Source File: WriteToText.java From deployment-examples with MIT License | 5 votes |
@Override public PDone expand(PCollection<String> input) { // Verify that the input has a compatible window type. checkArgument( input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder()); ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); return input.apply( TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites() .withNumShards(3)); }
Example 5
Source File: WriteOneFilePerWindow.java From deployment-examples with MIT License | 5 votes |
@Override public PDone expand(PCollection<String> input) { ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); }
Example 6
Source File: WriteToText.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<String> input) { // Verify that the input has a compatible window type. checkArgument( input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder()); ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); return input.apply( TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites() .withNumShards(3)); }
Example 7
Source File: WriteOneFilePerWindow.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<String> input) { ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix); TextIO.Write write = TextIO.write() .to(new PerWindowFiles(resource)) .withTempDirectory(resource.getCurrentDirectory()) .withWindowedWrites(); if (numShards != null) { write = write.withNumShards(numShards); } return input.apply(write); }
Example 8
Source File: WindowedWordCountIT.java From beam with Apache License 2.0 | 4 votes |
private void testWindowedWordCountPipeline(WindowedWordCountITOptions options) throws Exception { ResourceId output = FileBasedSink.convertToFileResourceIfPossible(options.getOutput()); PerWindowFiles filenamePolicy = new PerWindowFiles(output); List<ShardedFile> expectedOutputFiles = Lists.newArrayListWithCapacity(6); for (int startMinute : ImmutableList.of(0, 10, 20, 30, 40, 50)) { final Instant windowStart = new Instant(options.getMinTimestampMillis()).plus(Duration.standardMinutes(startMinute)); String filePrefix = filenamePolicy.filenamePrefixForWindow( new IntervalWindow(windowStart, windowStart.plus(Duration.standardMinutes(10)))); expectedOutputFiles.add( new NumberedShardedFile( output .getCurrentDirectory() .resolve(filePrefix, StandardResolveOptions.RESOLVE_FILE) .toString() + "*")); } ShardedFile inputFile = new ExplicitShardedFile(Collections.singleton(options.getInputFile())); // For this integration test, input is tiny and we can build the expected counts SortedMap<String, Long> expectedWordCounts = new TreeMap<>(); for (String line : inputFile.readFilesWithRetries(Sleeper.DEFAULT, BACK_OFF_FACTORY.backoff())) { String[] words = line.split(ExampleUtils.TOKENIZER_PATTERN, -1); for (String word : words) { if (!word.isEmpty()) { expectedWordCounts.put( word, MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L); } } } WindowedWordCount.runWindowedWordCount(options); assertThat(expectedOutputFiles, containsWordCounts(expectedWordCounts)); }