Example 1
Source File:    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
public void testBatch() {
    Pipeline pipeline = Pipeline.create();
    String query = "Jet flies";
    BatchSource<Status> twitterSearch =, query);
    BatchStage<String> tweets = pipeline
            .map(status -> "@" + status.getUser() + " - " + status.getText());
            list -> assertGreaterOrEquals("Emits at least 10 tweets in 1 minute.",
                    list.size(), 10)));
    Job job = jet.newJob(pipeline);
    try {
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
Example 2
Source File:    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static Pipeline build(String bootstrapServers) {
    Properties properties = new Properties();
    properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
    properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

    Pipeline pipeline = Pipeline.create();

            .readFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS))

    return pipeline;
Example 3
Source File:    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
public void test() {
    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {

    String connectionString = mongoContainer.connectionString();

    Pipeline p = Pipeline.create();
     .map(i -> new Document("key", i))
     .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME));


    MongoCollection<Document> collection = collection();
    assertEquals(100, collection.countDocuments());
Example 4
Source File:    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
public void testStream_withTermFilter() {
    Pipeline pipeline = Pipeline.create();
    List<String> terms = new ArrayList<String>(Arrays.asList("BTC", "ETH"));
    final StreamSource<String> twitterTestStream =
            credentials, () -> new StatusesFilterEndpoint().trackTerms(terms));
    StreamStage<String> tweets = pipeline
            .map(rawJson -> Json.parse(rawJson)
                                .getString("text", null));

            list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.", list.size(), 20)));
    Job job = jet.newJob(pipeline);
    try {
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
Example 5
Source File:    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
public void test_stream_influxDbSource_withPojoResultMapper() {
    InfluxDB db = influxdbContainer.getNewInfluxDB();

    Pipeline p = Pipeline.create();

            InfluxDbSources.influxDb("SELECT * FROM test_db..cpu",
     .addTimestamps(cpu -> cpu.time.toEpochMilli(), 0)


    assertEquals(VALUE_COUNT, jet.getList("results").size());
Example 6
Source File:    From hazelcast-jet-training with Apache License 2.0 6 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    SinkStage sinkStage = p.readFrom(TradeSource.tradeSource(1000))
            // Step 1 solution
            // .window(WindowDefinition.tumbling(3000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            // Step 2 solution
            // .window(WindowDefinition.tumbling(3000).setEarlyResultsPeriod(1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            // Step 3 solution
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice)
            // Step 4 solution
            // .groupingKey(Trade::getSymbol)
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))


    return p;
Example 7
Source File:    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
public void stream() {
    int addCount = 500;
    int streamCount = 2;

    for (int i = 0; i < streamCount; i++) {
        fillStream("stream-" + i, addCount);

    Map<String, String> streamOffsets = new HashMap<>();
    for (int i = 0; i < streamCount; i++) {
        streamOffsets.put("stream-" + i, "0");

    Sink<Object> sink = SinkBuilder
            .sinkBuilder("set", c -> c.jetInstance().getHazelcastInstance().getSet("set"))

    Pipeline p = Pipeline.create();
    p.readFrom("source", uri, streamOffsets,
            mes -> mes.getStream() + " - " + mes.getId()))

    Job job = instance.newJob(p);

    Collection<Object> set = instance.getHazelcastInstance().getSet("set");
    assertTrueEventually(() -> assertEquals(addCount * streamCount, set.size()));

Example 8
Source File:    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
public void testTimestampedStream_termFilter() {
    Pipeline pipeline = Pipeline.create();
    List<String> terms = new ArrayList<String>(Arrays.asList("San Mateo", "Brno", "London", "Istanbul"));

    final StreamSource<String> twitterTestStream = TwitterSources.timestampedStream(
            credentials, () -> new StatusesFilterEndpoint().trackTerms(terms));
    StreamStage<String> tweets = pipeline
            .map(rawJson -> Json.parse(rawJson)
                                .getString("text", null));
            list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.",
                    list.size(), 20)));
    Job job = jet.newJob(pipeline);
    try {
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
Example 9
Source File:    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);


    // Run the code to see the results in the console
    // Stop it before leaving the lab

    return p;
Example 10
Source File:    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);
    // StreamSource<String> source = Sources.fileWatcher(DIRECTORY);

     // .map( line-> Long.valueOf(line))
     .filter(item -> (item % 2) == 0)

    return p;
Example 11
Source File:    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();


    // Convert Trade stream to EnrichedTrade stream
    // - Trade (dto.Trade) has a symbol field
    // - Use LOOKUP_TABLE to look up full company name based on the symbol
    // - Create new Enriched Trade (dto.EnrichedTrade) using Trade and company name


    return p;
Example 12
Source File:    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
public void test_influxDbSink() {
    IList<Integer> measurements = jet.getList("mem_usage");
    for (int i = 0; i < VALUE_COUNT; i++) {

    InfluxDB db = influxdbContainer.getNewInfluxDB();
    db.query(new Query("DROP SERIES FROM mem_usage"));

    Pipeline p = Pipeline.create();

    int startTime = 0;
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), DATABASE_NAME, USERNAME, PASSWORD));


    List<Result> results = db.query(new Query("SELECT * FROM mem_usage")).getResults();
    assertEquals(1, results.size());
    List<Series> seriesList = results.get(0).getSeries();
    assertEquals(1, seriesList.size());
    Series series = seriesList.get(0);
    assertEquals(SERIES, series.getName());
    assertEquals(VALUE_COUNT, series.getValues().size());
Example 13
Source File:    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();


     // STEP 1 - Compute sum of trades for 3-second intervals
     // - Use 3 sec tumbling windows (defined in WindowDef.tumbling with size 3000
     // - Sum trade prices
     // Run the job and inspect the results. Stop the Job before moving to STEP 2.

     // STEP 2 - Compute sum of trades for 3-second intervals with speculative results every second
     // - Use early results when defining the window
     // - Watch the early result flag in the console output
     // Run the job and inspect the results. Stop the Job before moving to STEP 3.

     // STEP 3 - Compute sum of trades in last 3-second, updated each second
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before moving to STEP 4.

     // STEP 4 - Compute sum of trades in last 3-second for each trading symbol
     // - Group the stream on the trading symbol
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before leaving the lab.


    return p;
Example 14
Source File:    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
 * Helper method to construct the pipeline for the job
 * @return the pipeline for the job
public static Pipeline buildPipeline() {
    final Pipeline p = Pipeline.create();

    // Compute map server side
    final BatchStage<Horse> c = p.readFrom(, t -> true, HORSE_FROM_EVENT));

    final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                                               .filter(ent -> ent.getValue() > 1);


    return p;
Example 15
Source File:    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
 * Builds and returns the Pipeline which represents the actual computation.
private static Pipeline buildPipeline(Path sourceFile) {
    Pipeline pipeline = Pipeline.create();

    BatchStage<BreastCancerDiagnostic> fileSource = pipeline.readFrom(filesBuilder(sourceFile.getParent().toString())
            .build(path -> Files.lines(path).skip(1).map(BreastCancerDiagnostic::new)))
                                                            .setName("Read from CSV input file");

              .writeTo(Sinks.logger()).setName("Write to standard out");
    return pipeline;
Example 16
Source File:    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

            .mapUsingIMap(lookupTable, Trade::getSymbol,
                    (trade, companyName) -> new EnrichedTrade(trade, companyName) )

    return p;
Example 17
Source File:    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
public void when_readFromPulsarConsumer_then_jobGetsAllPublishedMessages() {
    JetInstance[] instances = new JetInstance[2];
    Arrays.setAll(instances, i -> createJetMember());

    String topicName = randomName();
    StreamSource<String> pulsarConsumerSrc = setupConsumerSource(topicName,
            x -> new String(x.getData(), StandardCharsets.UTF_8));

    Pipeline pipeline = Pipeline.create();
                    list -> {
                        assertEquals("# of Emitted items should be equal to # of published items",
                                ITEM_COUNT, list.size());
                        for (int i = 0; i < ITEM_COUNT; i++) {
                            String message = "hello-pulsar-" + i;
                            Assert.assertTrue("missing entry: " + message, list.contains(message));
    Job job = instances[0].newJob(pipeline);
    assertJobStatusEventually(job, JobStatus.RUNNING);

    produceMessages("hello-pulsar", topicName, ITEM_COUNT);

    try {
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    for (JetInstance instance:instances) {
Example 18
Source File:    From hazelcast-jet-demos with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) {
    ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>>
            tfServingContext = ServiceFactory
            .withCreateContextFn(context -> {
                WordIndex wordIndex = new WordIndex(context.attachedDirectory("data"));
                ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress)
                return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex);
            .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow())
            .withCreateServiceFn((context, tuple2) -> tuple2);

    Pipeline p = Pipeline.create();
     .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> {
         float[][] featuresTensorData = t.f1().createTensorInput(review);
         TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
         for (float[] featuresTensorDatum : featuresTensorData) {
             for (float v : featuresTensorDatum) {
         TensorShapeProto.Dim featuresDim1 =
         TensorShapeProto.Dim featuresDim2 =
         TensorShapeProto featuresShape =
         TensorProto featuresTensorProto =;

         // Generate gRPC request
         Int64Value version = Int64Value.newBuilder().setValue(1).build();
         Model.ModelSpec modelSpec =
         Predict.PredictRequest request = Predict.PredictRequest.newBuilder()
                                                                .putInputs("input_review", featuresTensorProto)

         return toCompletableFuture(t.f0().predict(request))
                 .thenApply(response -> {
                     float classification = response
                     // emit the review along with the classification
                     return tuple2(review, classification);
     .setLocalParallelism(1) // one worker is enough to drive they async calls
    return p;
Example 19
Source File:    From hazelcast-jet-training with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

      .withNativeTimestamps(0 )

     // Detect if price between two consecutive trades drops by more than 200

     // Use the mapStateful to keep price of previous Trade
     // - Consider using com.hazelcast.jet.accumulator.LongAccumulator as a mutable container for long values
     // - Return the price difference if drop is detected, nothing otherwise

     .writeTo(Sinks.logger( m -> "Price drop: " + m));

    return p;
Example 20
Source File:    From hazelcast-jet-contrib with Apache License 2.0 votes vote down vote up
public void testStream_whenWatchDatabase() {
    IList<Document> list = jet.getList("list");

    String connectionString = mongoContainer.connectionString();
    long value = startAtOperationTime.getValue();

    StreamSource<? extends Document> source = MongoDBSourceBuilder
            .streamDatabase(SOURCE_NAME, () -> MongoClients.create(connectionString))
            .databaseFn(client -> client.getDatabase(DB_NAME))
            .searchFn(db -> {
                List<Bson> aggregates = new ArrayList<>();
                aggregates.add(Aggregates.match(new Document("fullDocument.val", new Document("$gte", 10))
                        .append("operationType", "insert")));

                aggregates.add(Aggregates.project(new Document("fullDocument.val", 1).append("_id", 1)));
            .startAtOperationTimeFn(client -> new BsonTimestamp(value))

    Pipeline p = Pipeline.create();

    Job job = jet.newJob(p);

    MongoCollection<Document> col1 = collection("col1");
    MongoCollection<Document> col2 = collection("col2");

    col1.insertOne(new Document("val", 1));
    col1.insertOne(new Document("val", 10).append("foo", "bar"));

    col2.insertOne(new Document("val", 2));
    col2.insertOne(new Document("val", 11).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(2, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(10, list.get(0).get("val"));
        assertEquals(11, list.get(1).get("val"));


    col1.insertOne(new Document("val", 3));
    col1.insertOne(new Document("val", 12).append("foo", "bar"));

    col2.insertOne(new Document("val", 4));
    col2.insertOne(new Document("val", 13).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(4, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(12, list.get(2).get("val"));
        assertEquals(13, list.get(3).get("val"));

