Java Code Examples for com.hazelcast.jet.pipeline.Pipeline#create()

The following examples show how to use com.hazelcast.jet.pipeline.Pipeline#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TwitterSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void testBatch() {
    Pipeline pipeline = Pipeline.create();
    String query = "Jet flies";
    BatchSource<Status> twitterSearch = TwitterSources.search(credentials, query);
    BatchStage<String> tweets = pipeline
            .readFrom(twitterSearch)
            .map(status -> "@" + status.getUser() + " - " + status.getText());
    tweets.writeTo(AssertionSinks.assertCollectedEventually(60,
            list -> assertGreaterOrEquals("Emits at least 10 tweets in 1 minute.",
                    list.size(), 10)));
    Job job = jet.newJob(pipeline);
    sleepAtLeastSeconds(5);
    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
}

Example 2

Source File: ReadKafkaIntoHazelcast.java From hazelcast-jet-demos with Apache License 2.0

6 votes

public static Pipeline build(String bootstrapServers) {
    Properties properties = new Properties();
    properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
    properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

    Pipeline pipeline = Pipeline.create();

    pipeline
            .readFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS))
            .withoutTimestamps()
            .writeTo(Sinks.map(Constants.IMAP_NAME_PRECIOUS));

    return pipeline;
}

Example 3

Source File: MongoDBSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void test() {
    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {
        list.add(i);
    }

    String connectionString = mongoContainer.connectionString();

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
     .map(i -> new Document("key", i))
     .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME));

    jet.newJob(p).join();

    MongoCollection<Document> collection = collection();
    assertEquals(100, collection.countDocuments());
}

Example 4

Source File: TwitterSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void testStream_withTermFilter() {
    Pipeline pipeline = Pipeline.create();
    List<String> terms = new ArrayList<String>(Arrays.asList("BTC", "ETH"));
    final StreamSource<String> twitterTestStream = TwitterSources.stream(
            credentials, () -> new StatusesFilterEndpoint().trackTerms(terms));
    StreamStage<String> tweets = pipeline
            .readFrom(twitterTestStream)
            .withoutTimestamps()
            .map(rawJson -> Json.parse(rawJson)
                                .asObject()
                                .getString("text", null));

    tweets.writeTo(AssertionSinks.assertCollectedEventually(60,
            list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.", list.size(), 20)));
    Job job = jet.newJob(pipeline);
    sleepAtLeastSeconds(5);
    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
}

Example 5

Source File: InfluxDbSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void test_stream_influxDbSource_withPojoResultMapper() {
    InfluxDB db = influxdbContainer.getNewInfluxDB();
    fillCpuData(db);

    Pipeline p = Pipeline.create();

    p.readFrom(
            InfluxDbSources.influxDb("SELECT * FROM test_db..cpu",
                    DATABASE_NAME,
                    influxdbContainer.getUrl(),
                    USERNAME,
                    PASSWORD,
                    Cpu.class))
     .addTimestamps(cpu -> cpu.time.toEpochMilli(), 0)
     .writeTo(Sinks.list("results"));

    jet.newJob(p).join();

    assertEquals(VALUE_COUNT, jet.getList("results").size());
}

Example 6

Source File: Solution5.java From hazelcast-jet-training with Apache License 2.0

6 votes

private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    SinkStage sinkStage = p.readFrom(TradeSource.tradeSource(1000))
            .withNativeTimestamps(0)
            // Step 1 solution
            // .window(WindowDefinition.tumbling(3000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            //
            // Step 2 solution
            // .window(WindowDefinition.tumbling(3000).setEarlyResultsPeriod(1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            //
            // Step 3 solution
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice)
            //
            // Step 4 solution
            // .groupingKey(Trade::getSymbol)
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))

            .writeTo(Sinks.logger());

    return p;
}

Example 7

Source File: RedisSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

5 votes

@Test
public void stream() {
    int addCount = 500;
    int streamCount = 2;

    for (int i = 0; i < streamCount; i++) {
        fillStream("stream-" + i, addCount);
    }

    Map<String, String> streamOffsets = new HashMap<>();
    for (int i = 0; i < streamCount; i++) {
        streamOffsets.put("stream-" + i, "0");
    }

    Sink<Object> sink = SinkBuilder
            .sinkBuilder("set", c -> c.jetInstance().getHazelcastInstance().getSet("set"))
            .receiveFn(Set::add)
            .build();

    Pipeline p = Pipeline.create();
    p.readFrom(RedisSources.stream("source", uri, streamOffsets,
            mes -> mes.getStream() + " - " + mes.getId()))
            .withoutTimestamps()
            .writeTo(sink);

    Job job = instance.newJob(p);

    Collection<Object> set = instance.getHazelcastInstance().getSet("set");
    assertTrueEventually(() -> assertEquals(addCount * streamCount, set.size()));

    job.cancel();
}

Example 8

Source File: TwitterSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

5 votes

@Test
public void testTimestampedStream_termFilter() {
    Pipeline pipeline = Pipeline.create();
    List<String> terms = new ArrayList<String>(Arrays.asList("San Mateo", "Brno", "London", "Istanbul"));

    final StreamSource<String> twitterTestStream = TwitterSources.timestampedStream(
            credentials, () -> new StatusesFilterEndpoint().trackTerms(terms));
    StreamStage<String> tweets = pipeline
            .readFrom(twitterTestStream)
            .withNativeTimestamps(0)
            .map(rawJson -> Json.parse(rawJson)
                                .asObject()
                                .getString("text", null));
    tweets.writeTo(AssertionSinks.assertCollectedEventually(60,
            list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.",
                    list.size(), 20)));
    Job job = jet.newJob(pipeline);
    sleepAtLeastSeconds(5);
    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
}

Example 9

Source File: Lab1.java From hazelcast-jet-training with Apache License 2.0

5 votes

private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);

    p.readFrom(source)
     .withoutTimestamps()
     .writeTo(Sinks.logger());

    // Run the code to see the results in the console
    // Stop it before leaving the lab

    return p;
}

Example 10

Source File: Solution2.java From hazelcast-jet-training with Apache License 2.0

5 votes

private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);
    // StreamSource<String> source = Sources.fileWatcher(DIRECTORY);

    p.readFrom(source)
     .withoutTimestamps()
     // .map( line-> Long.valueOf(line))
     .filter(item -> (item % 2) == 0)
     .writeTo(Sinks.logger());

    return p;
}

Example 11

Source File: Lab4.java From hazelcast-jet-training with Apache License 2.0

5 votes

private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
     .withoutTimestamps()

    // Convert Trade stream to EnrichedTrade stream
    // - Trade (dto.Trade) has a symbol field
    // - Use LOOKUP_TABLE to look up full company name based on the symbol
    // - Create new Enriched Trade (dto.EnrichedTrade) using Trade and company name

    .writeTo(Sinks.logger());

    return p;
}

Example 12

Source File: InfluxDbSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

5 votes

@Test
public void test_influxDbSink() {
    IList<Integer> measurements = jet.getList("mem_usage");
    for (int i = 0; i < VALUE_COUNT; i++) {
        measurements.add(i);
    }

    InfluxDB db = influxdbContainer.getNewInfluxDB();
    db.query(new Query("DROP SERIES FROM mem_usage"));

    Pipeline p = Pipeline.create();

    int startTime = 0;
    p.readFrom(Sources.list(measurements))
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
                        .build())
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), DATABASE_NAME, USERNAME, PASSWORD));

    jet.newJob(p).join();

    List<Result> results = db.query(new Query("SELECT * FROM mem_usage")).getResults();
    assertEquals(1, results.size());
    List<Series> seriesList = results.get(0).getSeries();
    assertEquals(1, seriesList.size());
    Series series = seriesList.get(0);
    assertEquals(SERIES, series.getName());
    assertEquals(VALUE_COUNT, series.getValues().size());
}

Example 13

Source File: Lab5.java From hazelcast-jet-training with Apache License 2.0

5 votes

private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1000))
     .withNativeTimestamps(0)

     // STEP 1 - Compute sum of trades for 3-second intervals
     // - Use 3 sec tumbling windows (defined in WindowDef.tumbling with size 3000
     // - Sum trade prices
     // Run the job and inspect the results. Stop the Job before moving to STEP 2.

     // STEP 2 - Compute sum of trades for 3-second intervals with speculative results every second
     // - Use early results when defining the window
     // - Watch the early result flag in the console output
     // Run the job and inspect the results. Stop the Job before moving to STEP 3.

     // STEP 3 - Compute sum of trades in last 3-second, updated each second
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before moving to STEP 4.

     // STEP 4 - Compute sum of trades in last 3-second for each trading symbol
     // - Group the stream on the trading symbol
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before leaving the lab.



     .writeTo(Sinks.logger());


    return p;
}

Example 14

Source File: AnalysisJet.java From hazelcast-jet-demos with Apache License 2.0

5 votes

/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the job
 */
public static Pipeline buildPipeline() {
    final Pipeline p = Pipeline.create();

    // Compute map server side
    final BatchStage<Horse> c = p.readFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT));

    final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                                               .aggregate(counting())
                                               .filter(ent -> ent.getValue() > 1);

    c2.writeTo(Sinks.map(MULTIPLE));

    return p;
}

Example 15

Source File: BreastCancerClassification.java From hazelcast-jet-demos with Apache License 2.0

5 votes

/**
 * Builds and returns the Pipeline which represents the actual computation.
 */
private static Pipeline buildPipeline(Path sourceFile) {
    Pipeline pipeline = Pipeline.create();

    BatchStage<BreastCancerDiagnostic> fileSource = pipeline.readFrom(filesBuilder(sourceFile.getParent().toString())
            .glob(sourceFile.getFileName().toString())
            .build(path -> Files.lines(path).skip(1).map(BreastCancerDiagnostic::new)))
                                                            .setName("Read from CSV input file");

    fileSource.apply(applyPredictionFromModelFile())
              .writeTo(Sinks.logger()).setName("Write to standard out");
    return pipeline;
}

Example 16

Source File: Solution4.java From hazelcast-jet-training with Apache License 2.0

5 votes

private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
            .withNativeTimestamps(0)
            .mapUsingIMap(lookupTable, Trade::getSymbol,
                    (trade, companyName) -> new EnrichedTrade(trade, companyName) )
            .writeTo(Sinks.logger());

    return p;
}

Example 17

Source File: PulsarSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

5 votes

@Test
public void when_readFromPulsarConsumer_then_jobGetsAllPublishedMessages() {
    JetInstance[] instances = new JetInstance[2];
    Arrays.setAll(instances, i -> createJetMember());

    String topicName = randomName();
    StreamSource<String> pulsarConsumerSrc = setupConsumerSource(topicName,
            x -> new String(x.getData(), StandardCharsets.UTF_8));

    Pipeline pipeline = Pipeline.create();
    pipeline.readFrom(pulsarConsumerSrc)
            .withoutTimestamps()
            .writeTo(AssertionSinks.assertCollectedEventually(60,
                    list -> {
                        assertEquals("# of Emitted items should be equal to # of published items",
                                ITEM_COUNT, list.size());
                        for (int i = 0; i < ITEM_COUNT; i++) {
                            String message = "hello-pulsar-" + i;
                            Assert.assertTrue("missing entry: " + message, list.contains(message));
                        }
                    })
            );
    Job job = instances[0].newJob(pipeline);
    assertJobStatusEventually(job, JobStatus.RUNNING);

    produceMessages("hello-pulsar", topicName, ITEM_COUNT);

    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
    for (JetInstance instance:instances) {
        instance.shutdown();
    }
}

Example 18

Source File: ModelServerClassification.java From hazelcast-jet-demos with Apache License 2.0

4 votes

private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) {
    ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>>
            tfServingContext = ServiceFactory
            .withCreateContextFn(context -> {
                WordIndex wordIndex = new WordIndex(context.attachedDirectory("data"));
                ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress)
                                                              .usePlaintext().build();
                return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex);
            })
            .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow())
            .withCreateServiceFn((context, tuple2) -> tuple2);

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> {
         float[][] featuresTensorData = t.f1().createTensorInput(review);
         TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
         for (float[] featuresTensorDatum : featuresTensorData) {
             for (float v : featuresTensorDatum) {
                 featuresTensorBuilder.addFloatVal(v);
             }
         }
         TensorShapeProto.Dim featuresDim1 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData.length).build();
         TensorShapeProto.Dim featuresDim2 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData[0].length).build();
         TensorShapeProto featuresShape =
                 TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).build();
         featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_FLOAT)
                              .setTensorShape(featuresShape);
         TensorProto featuresTensorProto = featuresTensorBuilder.build();

         // Generate gRPC request
         Int64Value version = Int64Value.newBuilder().setValue(1).build();
         Model.ModelSpec modelSpec =
                 Model.ModelSpec.newBuilder().setName("reviewSentiment").setVersion(version).build();
         Predict.PredictRequest request = Predict.PredictRequest.newBuilder()
                                                                .setModelSpec(modelSpec)
                                                                .putInputs("input_review", featuresTensorProto)
                                                                .build();

         return toCompletableFuture(t.f0().predict(request))
                 .thenApply(response -> {
                     float classification = response
                             .getOutputsOrThrow("dense_1/Sigmoid:0")
                             .getFloatVal(0);
                     // emit the review along with the classification
                     return tuple2(review, classification);
                 });
     })
     .setLocalParallelism(1) // one worker is enough to drive they async calls
     .writeTo(Sinks.logger());
    return p;
}

Example 19

Source File: Lab6.java From hazelcast-jet-training with Apache License 2.0

4 votes

private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1))
      .withNativeTimestamps(0 )

     // Detect if price between two consecutive trades drops by more than 200

     // Use the mapStateful to keep price of previous Trade
     // - Consider using com.hazelcast.jet.accumulator.LongAccumulator as a mutable container for long values
     // - Return the price difference if drop is detected, nothing otherwise

     .writeTo(Sinks.logger( m -> "Price drop: " + m));

    return p;
}

Example 20

Source File: MongoDBSourceTest.java From hazelcast-jet-contrib with Apache License 2.0

votes

@Test
public void testStream_whenWatchDatabase() {
    IList<Document> list = jet.getList("list");

    String connectionString = mongoContainer.connectionString();
    long value = startAtOperationTime.getValue();

    StreamSource<? extends Document> source = MongoDBSourceBuilder
            .streamDatabase(SOURCE_NAME, () -> MongoClients.create(connectionString))
            .databaseFn(client -> client.getDatabase(DB_NAME))
            .destroyFn(MongoClient::close)
            .searchFn(db -> {
                List<Bson> aggregates = new ArrayList<>();
                aggregates.add(Aggregates.match(new Document("fullDocument.val", new Document("$gte", 10))
                        .append("operationType", "insert")));

                aggregates.add(Aggregates.project(new Document("fullDocument.val", 1).append("_id", 1)));
                return db.watch(aggregates);
            })
            .mapFn(ChangeStreamDocument::getFullDocument)
            .startAtOperationTimeFn(client -> new BsonTimestamp(value))
            .build();


    Pipeline p = Pipeline.create();
    p.readFrom(source)
     .withNativeTimestamps(0)
     .writeTo(Sinks.list(list));

    Job job = jet.newJob(p);

    MongoCollection<Document> col1 = collection("col1");
    MongoCollection<Document> col2 = collection("col2");

    col1.insertOne(new Document("val", 1));
    col1.insertOne(new Document("val", 10).append("foo", "bar"));

    col2.insertOne(new Document("val", 2));
    col2.insertOne(new Document("val", 11).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(2, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(10, list.get(0).get("val"));
        assertEquals(11, list.get(1).get("val"));

    });

    col1.insertOne(new Document("val", 3));
    col1.insertOne(new Document("val", 12).append("foo", "bar"));

    col2.insertOne(new Document("val", 4));
    col2.insertOne(new Document("val", 13).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(4, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(12, list.get(2).get("val"));
        assertEquals(13, list.get(3).get("val"));
    });

    job.cancel();

}