org.apache.beam.sdk.transforms.Create Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.Create. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestParserDoFnClass.java    From logparser with Apache License 2.0 6 votes vote down vote up
@Test
public void testClassDefinition() throws Exception {
    List<String> logLines = Collections.singletonList(TestCase.getInputLine());

    // Apply Create, passing the list and the coder, to create the PCollection.
    PCollection<String> input = pipeline.apply(Create.of(logLines)).setCoder(StringUtf8Coder.of());

    PCollection<TestRecord> filledTestRecords = input
        .apply("Extract Elements from logline",
            ParDo.of(new MyParserDoFn()));

    TestRecord expected = new TestRecord().setFullValid();

    PAssert.that(filledTestRecords).containsInAnyOrder(expected);

    pipeline.run().waitUntilFinish();
}
 
Example #2
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformTranslatorMissing() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();
  Pipeline p = Pipeline.create(options);

  p.apply(Create.of(Arrays.asList(1, 2, 3))).apply(new TestTransform());

  thrown.expect(IllegalStateException.class);
  thrown.expectMessage(containsString("no translator registered"));
  SdkComponents sdkComponents = SdkComponents.create(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  DataflowPipelineTranslator.fromOptions(options)
      .translate(
          p,
          pipelineProto,
          sdkComponents,
          DataflowRunner.fromOptions(options),
          Collections.emptyList());

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  assertValidJob(jobCaptor.getValue());
}
 
Example #3
Source File: PubsubIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test(timeout = 30000)
public void canSendGzippedPayloads() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions sinkOptions = pipeline.getOptions().as(SinkOptions.class);
  sinkOptions.setOutputType(OutputType.pubsub);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  SinkOptions.Parsed options = SinkOptions.parseSinkOptions(sinkOptions);

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(options.getOutputType().write(options));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/gzipped.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example #4
Source File: ApproximateDistinctTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void bigCardinality() {
  final int cardinality = 15000;
  final int p = 15;
  final int sp = 20;
  final double expectedErr = 1.04 / Math.sqrt(p);

  List<Integer> stream = new ArrayList<>();
  for (int i = 1; i <= cardinality; i++) {
    stream.addAll(Collections.nCopies(2, i));
  }
  Collections.shuffle(stream);

  PCollection<Long> res =
      tp.apply("big stream", Create.of(stream))
          .apply(
              "big cardinality",
              ApproximateDistinct.<Integer>globally().withPrecision(p).withSparsePrecision(sp));

  PAssert.that("Verify Accuracy for big cardinality", res)
      .satisfies(new VerifyAccuracy(cardinality, expectedErr));

  tp.run();
}
 
Example #5
Source File: PubsubToPubsubTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Tests whether all messages flow through when no filter is provided. */
@Test
@Category(NeedsRunner.class)
public void testNoInputFilterProvided() {
  PubsubToPubsub.Options options =
      TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class);
  PCollection<Long> pc =
      pipeline
          .apply(Create.of(allTestMessages))
          .apply(ParDo.of(ExtractAndFilterEventsFn.newBuilder().build()))
          .apply(Count.globally());

  PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(allTestMessages.size()));

  pipeline.run(options);
}
 
Example #6
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** A streaming job that terminates with no error messages is a success. */
@Test
public void testRunStreamingJobUsingPAssertThatSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.run(p, mockRunner);
}
 
Example #7
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRunStreamingJobNotUsingPAssertThatSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  p.apply(Create.of(1, 2, 3));

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockStreamingMetricResponse(ImmutableMap.of()));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.run(p, mockRunner);
}
 
Example #8
Source File: CacheTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldCacheTest() {
  SparkPipelineOptions options = createOptions();
  options.setCacheDisabled(true);
  Pipeline pipeline = Pipeline.create(options);

  Values<String> valuesTransform = Create.of("foo", "bar");
  PCollection pCollection = mock(PCollection.class);

  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options);
  ctxt.getCacheCandidates().put(pCollection, 2L);

  assertFalse(ctxt.shouldCache(valuesTransform, pCollection));

  options.setCacheDisabled(false);
  assertTrue(ctxt.shouldCache(valuesTransform, pCollection));

  GroupByKey<String, String> gbkTransform = GroupByKey.create();
  assertFalse(ctxt.shouldCache(gbkTransform, pCollection));
}
 
Example #9
Source File: Task.java    From beam with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> events =
      pipeline.apply(
          Create.timestamped(
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-05T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-05T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-08T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-08T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-08T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-10T00:00:00+00:00"))
          )
      );

  PCollection<KV<String, Long>> output = applyTransform(events);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example #10
Source File: AvroIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that {@code AvroIO} can read an upgraded version of an old class, as long as the schema
 * resolution process succeeds. This test covers the case when a new, {@code @Nullable} field
 * has been added.
 *
 * <p>For more information, see http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution
 */
@Test
@Category(NeedsRunner.class)
public void testWriteThenReadSchemaUpgrade() throws Throwable {
  List<GenericClass> values =
      ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
  File outputFile = tmpFolder.newFile("output.avro");

  writePipeline
      .apply(Create.of(values))
      .apply(
          AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding());
  writePipeline.run();

  List<GenericClassV2> expected =
      ImmutableList.of(new GenericClassV2(3, "hi", null), new GenericClassV2(5, "bar", null));

  PAssert.that(
          readPipeline.apply(
              AvroIO.read(GenericClassV2.class)
                  .withBeamSchemas(withBeamSchemas)
                  .from(outputFile.getAbsolutePath())))
      .containsInAnyOrder(expected);
  readPipeline.run();
}
 
Example #11
Source File: QueryDispositionLocationTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void writeWithWriteEmptyDispositionWithEmptyTableSuccess() throws SQLException {
  FakeSnowflakeDatabase.createTable(FAKE_TABLE);

  pipeline
      .apply(Create.of(testData))
      .apply(
          "Write SnowflakeIO",
          SnowflakeIO.<Long>write()
              .withDataSourceConfiguration(dc)
              .withTable(FAKE_TABLE)
              .withStagingBucketName(options.getStagingBucketName())
              .withStorageIntegrationName(options.getStorageIntegrationName())
              .withFileNameTemplate("output*")
              .withUserDataMapper(TestUtils.getLongCsvMapper())
              .withWriteDisposition(WriteDisposition.EMPTY)
              .withSnowflakeService(snowflakeService));

  pipeline.run(options).waitUntilFinish();

  List<Long> actualData = FakeSnowflakeDatabase.getElementsAsLong(FAKE_TABLE);

  assertTrue(TestUtils.areListsEqual(testData, actualData));
}
 
Example #12
Source File: ParDoTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSideInputAsList() {
  PCollectionView<List<Integer>> sideInputView =
      pipeline.apply("Create sideInput", Create.of(1, 2, 3)).apply(View.asList());
  PCollection<Integer> input =
      pipeline
          .apply("Create input", Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          List<Integer> sideInputValue = c.sideInput(sideInputView);
                          if (!sideInputValue.contains(c.element())) {
                            c.output(c.element());
                          }
                        }
                      })
                  .withSideInputs(sideInputView));
  PAssert.that(input).containsInAnyOrder(4, 5, 6, 7, 8, 9, 10);
  pipeline.run();
}
 
Example #13
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a batch job terminates in a failure state even if all assertions passed, it
 * throws an error to that effect.
 */
@Test
public void testRunBatchJobThatFails() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, false /* tentative */));
  expectedException.expect(RuntimeException.class);
  runner.run(p, mockRunner);
  // Note that fail throws an AssertionError which is why it is placed out here
  // instead of inside the try-catch block.
  fail("AssertionError expected");
}
 
Example #14
Source File: TaskTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void groupByKey() {
  PCollection<String> numbers =
      testPipeline.apply(
          Create.of("apple", "banana", "cherry", "durian", "guava", "melon")
      );

  PCollection<KV<String, String>> results = Task.applyTransform(numbers);

  PAssert.that(results)
      .containsInAnyOrder(
          KV.of("a", "apple"),
          KV.of("b", "banana"),
          KV.of("c", "cherry"),
          KV.of("d", "durian"),
          KV.of("g", "guava"),
          KV.of("m", "melon")
      );

  testPipeline.run().waitUntilFinish();
}
 
Example #15
Source File: TaskTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void flatten() {
  PCollection<String> wordsStartingWithA =
      testPipeline.apply("Words starting with A",
          Create.of("apple", "ant", "arrow"));
  PCollection<String> wordsStartingWithB =
      testPipeline.apply("Words starting with B",
          Create.of("ball", "book", "bow"));

  PCollection<String> results = Task.applyTransform(wordsStartingWithA, wordsStartingWithB);

  PAssert.that(results)
      .containsInAnyOrder("apple", "ant", "arrow", "ball", "book", "bow");

  testPipeline.run().waitUntilFinish();
}
 
Example #16
Source File: AvroTableFileAsMutationsTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private PCollection<FileShard> runFileShardingPipeline(Metadata fileMetadata, int splitSize) {

    PCollectionView<Map<String, String>> filenamesToTableNamesMapView =
        p.apply(
                "Create File/Table names Map",
                Create.of(
                    ImmutableMap.<String, String>of(
                        fileMetadata.resourceId().toString(), "testtable")))
            .apply(View.asMap());

    return p.apply("Create Metadata", Create.of(fileMetadata))
        .apply(FileIO.readMatches())
        // Pcollection<FileIO.ReadableFile>
        .apply(
            "Split into ranges",
            ParDo.of(new SplitIntoRangesFn(splitSize, filenamesToTableNamesMapView))
                .withSideInputs(filenamesToTableNamesMapView))
        .setCoder(FileShard.Coder.of());
  }
 
Example #17
Source File: Pipelines.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
static PCollection<Record> buildBasePipeline(final TestPipeline pipeline) {
    final RecordBuilderFactory factory = new AvroRecordBuilderFactoryProvider().apply(null);
    return pipeline
            .apply(Create.of("a", "b"))
            .apply(MapElements.into(TypeDescriptor.of(Record.class)).via((String input) -> {
                final Record b1 = factory.newRecordBuilder().withString("foo", input).build();
                final Record b2 = factory.newRecordBuilder().withString("bar", input).build();
                return factory
                        .newRecordBuilder()
                        .withArray(factory
                                .newEntryBuilder()
                                .withName("b1")
                                .withType(Schema.Type.ARRAY)
                                .withElementSchema(b1.getSchema())
                                .build(), singletonList(b1))
                        .withArray(factory
                                .newEntryBuilder()
                                .withName("b2")
                                .withType(Schema.Type.ARRAY)
                                .withElementSchema(b2.getSchema())
                                .build(), singletonList(b2))
                        .build();
            }))
            .setCoder(SchemaRegistryCoder.of());
}
 
Example #18
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private PTransform<PBegin, PDone> outputStartTo(StaticQueue<Integer> queue) {
  return new PTransform<PBegin, PDone>() {
    @Override
    public PDone expand(PBegin input) {
      input
          .apply(Create.of(1))
          .apply(
              MapElements.into(TypeDescriptors.voids())
                  .via(
                      in -> {
                        queue.add(in);
                        return null;
                      }));
      return PDone.in(input.getPipeline());
    }
  };
}
 
Example #19
Source File: DropFieldsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testDropTopLevelField() {
  Schema expectedSchema = Schema.builder().addStringField("field2").build();

  PCollection<Row> result =
      pipeline
          .apply(
              Create.of(simpleRow(1, "one"), simpleRow(2, "two"), simpleRow(3, "three"))
                  .withRowSchema(SIMPLE_SCHEMA))
          .apply(DropFields.fields("field1"));
  assertEquals(expectedSchema, result.getSchema());

  List<Row> expectedRows =
      Lists.newArrayList(
          Row.withSchema(expectedSchema).addValue("one").build(),
          Row.withSchema(expectedSchema).addValue("two").build(),
          Row.withSchema(expectedSchema).addValue("three").build());
  PAssert.that(result).containsInAnyOrder(expectedRows);
  pipeline.run();
}
 
Example #20
Source File: GeoCityLookupTest.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testCityAllowed() {
  final List<String> input = Arrays.asList("{\"attributeMap\":" //
      + "{\"remote_addr\":\"10.0.0.2\"" //
      + ",\"x_forwarded_for\":\"192.168.1.2, 216.160.83.56, 60.1.1.1\"" //
      + "},\"payload\":\"\"}");

  final List<String> expected = Arrays.asList("{\"attributeMap\":" //
      + "{\"geo_city\":\"Milton\"" //
      + ",\"geo_country\":\"US\"" //
      + ",\"geo_db_version\":\"2019-01-03T21:26:19Z\"" //
      + ",\"geo_subdivision1\":\"WA\"" //
      + "},\"payload\":\"\"}");

  final PCollection<String> output = pipeline //
      .apply(Create.of(input)) //
      .apply(InputFileFormat.json.decode()) //
      .apply(GeoCityLookup.of(pipeline.newProvider(MMDB),
          pipeline.newProvider("src/test/resources/cityFilters/milton.txt")))
      .apply(OutputFileFormat.json.encode());

  PAssert.that(output).containsInAnyOrder(expected);

  GeoCityLookup.clearSingletonsForTests();
  pipeline.run();
}
 
Example #21
Source File: IndexedRecordToJsonTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
    PAssert
            .that(pipeline
                    .apply(Create
                            .of(newIndexedRecord("first"), newIndexedRecord("second"))
                            .withCoder(AvroCoder.of(IndexedRecord.class, getSchema())))
                    .apply(new IndexedRecordToJson()))
            .satisfies(values -> {
                assertEquals(asList("first", "second"),
                        StreamSupport
                                .stream(values.spliterator(), false)
                                .map(k -> k.getString("name"))
                                .sorted()
                                .collect(toList()));
                return null;
            });
    assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
 
Example #22
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the {@link
 * DirectRunner}.
 */
@Test
public void testMutatingOutputThenOutputDoFnError() throws Exception {
  Pipeline pipeline = getPipeline();

  pipeline
      .apply(Create.of(42))
      .apply(
          ParDo.of(
              new DoFn<Integer, List<Integer>>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
                  c.output(outputList);
                  outputList.set(0, 37);
                  c.output(outputList);
                }
              }));

  thrown.expect(IllegalMutationException.class);
  thrown.expectMessage("output");
  thrown.expectMessage("must not be mutated");
  pipeline.run();
}
 
Example #23
Source File: BigQueryIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteToTableDecorator() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));
  p.apply(Create.of(row1, row2))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id$20171127")
              .withTestServices(fakeBqServices)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(schema)
              .withoutValidation());
  p.run();
}
 
Example #24
Source File: PortablePipelineDotRendererTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCompositePipeline() {
  p.apply(Create.timestamped(TimestampedValue.of(KV.of(1, 1), new Instant(1))))
      .apply(Window.into(FixedWindows.of(Duration.millis(10))))
      .apply(Sum.integersPerKey());

  assertEquals(
      "digraph {"
          + "    rankdir=LR"
          + "    0 [label=\"Create.TimestampedValues\\n\"]"
          + "    1 [label=\"Window.Into()\\n\"]"
          + "    0 -> 1 [style=solid label=\"Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps).output\"]"
          + "    2 [label=\"Combine.perKey(SumInteger)\\nbeam:transform:combine_per_key:v1\"]"
          + "    1 -> 2 [style=solid label=\"Window.Into()/Window.Assign.out\"]"
          + "}",
      PortablePipelineDotRenderer.toDotString(PipelineTranslation.toProto(p))
          .replaceAll(System.lineSeparator(), ""));
}
 
Example #25
Source File: PubsubToPubsubTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/** Tests whether only the valid messages flow through when a filter is provided. */
@Test
@Category(NeedsRunner.class)
public void testInputFilterProvided() {
  PubsubToPubsub.Options options =
      TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class);
  PCollection<Long> pc =
      pipeline
          .apply(Create.of(allTestMessages))
          .apply(
              ParDo.of(
                  ExtractAndFilterEventsFn.newBuilder()
                      .withFilterKey(options.getFilterKey())
                      .withFilterValue(options.getFilterValue())
                      .build()))
          .apply(Count.globally());

  PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(goodTestMessages.size()));

  options.setFilterKey(ValueProvider.StaticValueProvider.of(FILTER_KEY));
  options.setFilterValue(ValueProvider.StaticValueProvider.of(FILTER_VALUE));

  pipeline.run(options);
}
 
Example #26
Source File: BigtableIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Tests that a record gets written to the service and messages are logged. */
@Test
public void testWriting() throws Exception {
  final String table = "table";
  final String key = "key";
  final String value = "value";

  service.createTable(table);

  p.apply("single row", Create.of(makeWrite(key, value)).withCoder(bigtableCoder))
      .apply("write", defaultWrite.withTableId(table));
  p.run();

  logged.verifyDebug("Wrote 1 records");

  assertEquals(1, service.tables.size());
  assertNotNull(service.getTable(table));
  Map<ByteString, ByteString> rows = service.getTable(table);
  assertEquals(1, rows.size());
  assertEquals(ByteString.copyFromUtf8(value), rows.get(ByteString.copyFromUtf8(key)));
}
 
Example #27
Source File: SnowflakeIOWriteTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void writeToExternalWithKVInput() {
  pipeline
      .apply(Create.of(testData))
      .apply(ParDo.of(new TestUtils.ParseToKv()))
      .apply(
          "Write SnowflakeIO",
          SnowflakeIO.<KV<String, Long>>write()
              .withDataSourceConfiguration(dc)
              .withUserDataMapper(TestUtils.getLongCsvMapperKV())
              .withTable(FAKE_TABLE)
              .withStagingBucketName(options.getStagingBucketName())
              .withStorageIntegrationName(options.getStorageIntegrationName())
              .withSnowflakeService(snowflakeService));

  pipeline.run(options).waitUntilFinish();
}
 
Example #28
Source File: TransformTreeTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testMultiGraphSetup() {
  PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));

  input.apply(new UnboundOutputCreator());

  p.run();
}
 
Example #29
Source File: ValidateRunnerXlangTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({ValidatesRunner.class, UsesCrossLanguageTransforms.class})
public void multiInputOutputWithSideInputTest() {
  PCollection<String> main1 = testPipeline.apply("createMain1", Create.of("a", "bb"));
  PCollection<String> main2 = testPipeline.apply("createMain2", Create.of("x", "yy", "zzz"));
  PCollection<String> side = testPipeline.apply("createSide", Create.of("s"));
  PCollectionTuple pTuple =
      PCollectionTuple.of("main1", main1)
          .and("main2", main2)
          .and("side", side)
          .apply(External.of(TEST_MULTI_URN, new byte[] {}, expansionAddr).withMultiOutputs());
  PAssert.that(pTuple.get("main")).containsInAnyOrder("as", "bbs", "xs", "yys", "zzzs");
  PAssert.that(pTuple.get("side")).containsInAnyOrder("ss");
}
 
Example #30
Source File: WriteWithShardingFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void withNoShardingSpecifiedReturnsNewTransform() {
  ResourceId outputDirectory = LocalResources.fromString("/foo", true /* isDirectory */);

  PTransform<PCollection<Object>, WriteFilesResult<Void>> original =
      WriteFiles.to(
          new FileBasedSink<Object, Void, Object>(
              StaticValueProvider.of(outputDirectory),
              DynamicFileDestinations.constant(new FakeFilenamePolicy())) {
            @Override
            public WriteOperation<Void, Object> createWriteOperation() {
              throw new IllegalArgumentException("Should not be used");
            }
          });
  @SuppressWarnings("unchecked")
  PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));

  AppliedPTransform<
          PCollection<Object>,
          WriteFilesResult<Void>,
          PTransform<PCollection<Object>, WriteFilesResult<Void>>>
      originalApplication =
          AppliedPTransform.of("write", objs.expand(), Collections.emptyMap(), original, p);

  assertThat(
      factory.getReplacementTransform(originalApplication).getTransform(),
      not(equalTo((Object) original)));
}