Java Code Examples for org.apache.beam.sdk.io.fs.ResourceId#toString()
The following examples show how to use
org.apache.beam.sdk.io.fs.ResourceId#toString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileUtils.java From deployment-examples with MIT License | 6 votes |
public static String copyFile(ResourceId sourceFile, ResourceId destinationFile) throws IOException { try (WritableByteChannel writeChannel = FileSystems.create(destinationFile, "text/plain")) { try (ReadableByteChannel readChannel = FileSystems.open(sourceFile)) { final ByteBuffer buffer = ByteBuffer.allocateDirect(16 * 1024); while (readChannel.read(buffer) != -1) { buffer.flip(); writeChannel.write(buffer); buffer.compact(); } buffer.flip(); while (buffer.hasRemaining()) { writeChannel.write(buffer); } } } return destinationFile.toString(); }
Example 2
Source File: WindowedFilenamePolicy.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Resolves any date variables which exist in the output directory path. This allows for the * dynamically changing of the output location based on the window end time. * * @return The new output directory with all variables resolved. */ private ResourceId resolveWithDateTemplates( ValueProvider<String> outputDirectoryStr, BoundedWindow window) { ResourceId outputDirectory = FileSystems.matchNewResource(outputDirectoryStr.get(), true); if (window instanceof IntervalWindow) { IntervalWindow intervalWindow = (IntervalWindow) window; DateTime time = intervalWindow.end().toDateTime(); String outputPath = outputDirectory.toString(); outputPath = outputPath.replace("YYYY", YEAR.print(time)); outputPath = outputPath.replace("MM", MONTH.print(time)); outputPath = outputPath.replace("DD", DAY.print(time)); outputPath = outputPath.replace("HH", HOUR.print(time)); outputPath = outputPath.replace("mm", MINUTE.print(time)); outputDirectory = FileSystems.matchNewResource(outputPath, true); } return outputDirectory; }
Example 3
Source File: WindowedFilenamePolicy.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Resolves any date variables which exist in the output directory path. This allows for the * dynamically changing of the output location based on the window end time. * * @return The new output directory with all variables resolved. */ private ResourceId resolveWithDateTemplates( ValueProvider<String> outputDirectoryStr, BoundedWindow window) { ResourceId outputDirectory = FileSystems.matchNewResource(outputDirectoryStr.get(), true); if (window instanceof IntervalWindow) { IntervalWindow intervalWindow = (IntervalWindow) window; DateTime time = intervalWindow.end().toDateTime(); String outputPath = outputDirectory.toString(); outputPath = outputPath.replace("YYYY", YEAR.print(time)); outputPath = outputPath.replace("MM", MONTH.print(time)); outputPath = outputPath.replace("DD", DAY.print(time)); outputPath = outputPath.replace("HH", HOUR.print(time)); outputPath = outputPath.replace("mm", MINUTE.print(time)); outputDirectory = FileSystems.matchNewResource(outputPath, true); } return outputDirectory; }
Example 4
Source File: WindowedFilenamePolicyTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Tests that windowedFilename() constructs the filename correctly according to the parameters * when using Strings. */ @Test public void testWindowedFilenameFormatString() throws IOException { // Arrange // ResourceId outputDirectory = getBaseTempDirectory(); WindowedContext context = mock(WindowedContext.class); BoundedWindow window = mock(BoundedWindow.class); PaneInfo paneInfo = PaneInfo.createPane(false, true, Timing.ON_TIME, 0, 0); WindowedFilenamePolicy policy = new WindowedFilenamePolicy( outputDirectory.toString(), "string-output", "-SSS-of-NNN", ".csv"); // Act // ResourceId filename = policy.windowedFilename(1, 1, window, paneInfo, new TestOutputFileHints()); // Assert // assertThat(filename, is(notNullValue())); assertThat(filename.getFilename(), is(equalTo("string-output-001-of-001.csv"))); }
Example 5
Source File: FileUtils.java From beam with Apache License 2.0 | 6 votes |
public static String copyFile(ResourceId sourceFile, ResourceId destinationFile) throws IOException { try (WritableByteChannel writeChannel = FileSystems.create(destinationFile, "text/plain")) { try (ReadableByteChannel readChannel = FileSystems.open(sourceFile)) { final ByteBuffer buffer = ByteBuffer.allocateDirect(16 * 1024); while (readChannel.read(buffer) != -1) { buffer.flip(); writeChannel.write(buffer); buffer.compact(); } buffer.flip(); while (buffer.hasRemaining()) { writeChannel.write(buffer); } } } return destinationFile.toString(); }
Example 6
Source File: DefaultFilenamePolicyTest.java From beam with Apache License 2.0 | 6 votes |
private static String constructName( String baseFilename, String shardTemplate, String suffix, int shardNum, int numShards, String paneStr, String windowStr) { ResourceId constructed = DefaultFilenamePolicy.constructName( FileSystems.matchNewResource(baseFilename, false), shardTemplate, suffix, shardNum, numShards, paneStr, windowStr); return constructed.toString(); }
Example 7
Source File: BoundedSideInputJoinTest.java From beam with Apache License 2.0 | 5 votes |
/** Test {@code query} matches {@code model}. */ private <T extends KnownSize> void queryMatchesModel( String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception { ResourceId sideInputResourceId = FileSystems.matchNewResource( String.format( "%s/BoundedSideInputJoin-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false); config.sideInputUrl = sideInputResourceId.toString(); try { PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config); query.setSideInput(sideInput); PCollection<Event> events = p.apply( name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config)); PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query)); PAssert.that(results).satisfies(model.assertionFor()); PipelineResult result = p.run(); result.waitUntilFinish(); } finally { NexmarkUtils.cleanUpSideInput(config); } }
Example 8
Source File: SessionSideInputJoinTest.java From beam with Apache License 2.0 | 5 votes |
/** Test {@code query} matches {@code model}. */ private <T extends KnownSize> void queryMatchesModel( String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception { ResourceId sideInputResourceId = FileSystems.matchNewResource( String.format( "%s/SessionSideInputJoin-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false); config.sideInputUrl = sideInputResourceId.toString(); try { PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config); query.setSideInput(sideInput); PCollection<Event> events = p.apply( name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config)); PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query)); PAssert.that(results).satisfies(model.assertionFor()); PipelineResult result = p.run(); result.waitUntilFinish(); } finally { NexmarkUtils.cleanUpSideInput(config); } }
Example 9
Source File: SqlBoundedSideInputJoinTest.java From beam with Apache License 2.0 | 5 votes |
/** Test {@code query} matches {@code model}. */ private <T extends KnownSize> void queryMatchesModel( String name, NexmarkConfiguration config, NexmarkQueryTransform<T> query, NexmarkQueryModel<T> model, boolean streamingMode) throws Exception { ResourceId sideInputResourceId = FileSystems.matchNewResource( String.format( "%s/JoinToFiles-%s", p.getOptions().getTempLocation(), new Random().nextInt()), false); config.sideInputUrl = sideInputResourceId.toString(); try { PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config); query.setSideInput(sideInput); PCollection<Event> events = p.apply( name + ".Read", streamingMode ? NexmarkUtils.streamEventsSource(config) : NexmarkUtils.batchEventsSource(config)); PCollection<TimestampedValue<T>> results = (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query)); PAssert.that(results).satisfies(model.assertionFor()); PipelineResult result = p.run(); result.waitUntilFinish(); } finally { NexmarkUtils.cleanUpSideInput(config); } }
Example 10
Source File: NexmarkUtilsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testPrepareCsvSideInput() throws Exception { NexmarkConfiguration config = NexmarkConfiguration.DEFAULT.copy(); config.sideInputType = NexmarkUtils.SideInputType.CSV; ResourceId sideInputResourceId = FileSystems.matchNewResource( String.format( "%s/JoinToFiles-%s", pipeline.getOptions().getTempLocation(), new Random().nextInt()), false); config.sideInputUrl = sideInputResourceId.toString(); config.sideInputRowCount = 10000; config.sideInputNumShards = 15; PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(pipeline, config); try { PAssert.that(sideInput) .containsInAnyOrder( LongStream.range(0, config.sideInputRowCount) .boxed() .map(l -> KV.of(l, l.toString())) .collect(Collectors.toList())); pipeline.run(); } finally { NexmarkUtils.cleanUpSideInput(config); } }
Example 11
Source File: FileBasedSinkTest.java From beam with Apache License 2.0 | 5 votes |
/** Generate n temporary files using the temporary file pattern of Writer. */ private List<File> generateTemporaryFilesForFinalize(int numFiles) throws Exception { List<File> temporaryFiles = new ArrayList<>(); for (int i = 0; i < numFiles; i++) { ResourceId temporaryFile = WriteOperation.buildTemporaryFilename(getBaseTempDirectory(), "" + i); File tmpFile = new File(tmpFolder.getRoot(), temporaryFile.toString()); tmpFile.getParentFile().mkdirs(); assertTrue(tmpFile.createNewFile()); temporaryFiles.add(tmpFile); } return temporaryFiles; }
Example 12
Source File: TextIOWriteTest.java From beam with Apache License 2.0 | 4 votes |
private static void assertOutputFiles( String[] elems, final String header, final String footer, int numShards, ResourceId outputPrefix, String shardNameTemplate) throws Exception { List<File> expectedFiles = new ArrayList<>(); if (numShards == 0) { String pattern = outputPrefix.toString() + "*"; List<MatchResult> matches = FileSystems.match(Collections.singletonList(pattern)); for (Metadata expectedFile : Iterables.getOnlyElement(matches).metadata()) { expectedFiles.add(new File(expectedFile.resourceId().toString())); } } else { for (int i = 0; i < numShards; i++) { expectedFiles.add( new File( DefaultFilenamePolicy.constructName( outputPrefix, shardNameTemplate, "", i, numShards, null, null) .toString())); } } List<List<String>> actual = new ArrayList<>(); for (File tmpFile : expectedFiles) { List<String> currentFile = readLinesFromFile(tmpFile); actual.add(currentFile); } List<String> expectedElements = new ArrayList<>(elems.length); for (String elem : elems) { byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem); String line = new String(encodedElem, Charsets.UTF_8); expectedElements.add(line); } List<String> actualElements = Lists.newArrayList( Iterables.concat( FluentIterable.from(actual) .transform(removeHeaderAndFooter(header, footer)) .toList())); assertThat(actualElements, containsInAnyOrder(expectedElements.toArray())); assertTrue(actual.stream().allMatch(haveProperHeaderAndFooter(header, footer)::apply)); }