Java Code Examples for com.hazelcast.jet.pipeline.Pipeline#create()

The following examples show how to use com.hazelcast.jet.pipeline.Pipeline#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TwitterSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void testBatch() {
    Pipeline pipeline = Pipeline.create();
    String query = "Jet flies";
    BatchSource<Status> twitterSearch = TwitterSources.search(credentials, query);
    BatchStage<String> tweets = pipeline
            .readFrom(twitterSearch)
            .map(status -> "@" + status.getUser() + " - " + status.getText());
    tweets.writeTo(AssertionSinks.assertCollectedEventually(60,
            list -> assertGreaterOrEquals("Emits at least 10 tweets in 1 minute.",
                    list.size(), 10)));
    Job job = jet.newJob(pipeline);
    sleepAtLeastSeconds(5);
    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
}
 
Example 2
Source File: ReadKafkaIntoHazelcast.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static Pipeline build(String bootstrapServers) {
    Properties properties = new Properties();
    properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
    properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName());
    properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

    Pipeline pipeline = Pipeline.create();

    pipeline
            .readFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS))
            .withoutTimestamps()
            .writeTo(Sinks.map(Constants.IMAP_NAME_PRECIOUS));

    return pipeline;
}
 
Example 3
Source File: MongoDBSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {
        list.add(i);
    }

    String connectionString = mongoContainer.connectionString();

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
     .map(i -> new Document("key", i))
     .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME));

    jet.newJob(p).join();

    MongoCollection<Document> collection = collection();
    assertEquals(100, collection.countDocuments());
}
 
Example 4
Source File: TwitterSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void testStream_withTermFilter() {
    Pipeline pipeline = Pipeline.create();
    List<String> terms = new ArrayList<String>(Arrays.asList("BTC", "ETH"));
    final StreamSource<String> twitterTestStream = TwitterSources.stream(
            credentials, () -> new StatusesFilterEndpoint().trackTerms(terms));
    StreamStage<String> tweets = pipeline
            .readFrom(twitterTestStream)
            .withoutTimestamps()
            .map(rawJson -> Json.parse(rawJson)
                                .asObject()
                                .getString("text", null));

    tweets.writeTo(AssertionSinks.assertCollectedEventually(60,
            list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.", list.size(), 20)));
    Job job = jet.newJob(pipeline);
    sleepAtLeastSeconds(5);
    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
}
 
Example 5
Source File: InfluxDbSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 6 votes vote down vote up
@Test
public void test_stream_influxDbSource_withPojoResultMapper() {
    InfluxDB db = influxdbContainer.getNewInfluxDB();
    fillCpuData(db);

    Pipeline p = Pipeline.create();

    p.readFrom(
            InfluxDbSources.influxDb("SELECT * FROM test_db..cpu",
                    DATABASE_NAME,
                    influxdbContainer.getUrl(),
                    USERNAME,
                    PASSWORD,
                    Cpu.class))
     .addTimestamps(cpu -> cpu.time.toEpochMilli(), 0)
     .writeTo(Sinks.list("results"));

    jet.newJob(p).join();

    assertEquals(VALUE_COUNT, jet.getList("results").size());
}
 
Example 6
Source File: Solution5.java    From hazelcast-jet-training with Apache License 2.0 6 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    SinkStage sinkStage = p.readFrom(TradeSource.tradeSource(1000))
            .withNativeTimestamps(0)
            // Step 1 solution
            // .window(WindowDefinition.tumbling(3000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            //
            // Step 2 solution
            // .window(WindowDefinition.tumbling(3000).setEarlyResultsPeriod(1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))
            //
            // Step 3 solution
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice)
            //
            // Step 4 solution
            // .groupingKey(Trade::getSymbol)
            // .window(WindowDefinition.sliding(3000,1000))
            // .aggregate(AggregateOperations.summingLong(Trade::getPrice))

            .writeTo(Sinks.logger());

    return p;
}
 
Example 7
Source File: RedisSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void stream() {
    int addCount = 500;
    int streamCount = 2;

    for (int i = 0; i < streamCount; i++) {
        fillStream("stream-" + i, addCount);
    }

    Map<String, String> streamOffsets = new HashMap<>();
    for (int i = 0; i < streamCount; i++) {
        streamOffsets.put("stream-" + i, "0");
    }

    Sink<Object> sink = SinkBuilder
            .sinkBuilder("set", c -> c.jetInstance().getHazelcastInstance().getSet("set"))
            .receiveFn(Set::add)
            .build();

    Pipeline p = Pipeline.create();
    p.readFrom(RedisSources.stream("source", uri, streamOffsets,
            mes -> mes.getStream() + " - " + mes.getId()))
            .withoutTimestamps()
            .writeTo(sink);

    Job job = instance.newJob(p);

    Collection<Object> set = instance.getHazelcastInstance().getSet("set");
    assertTrueEventually(() -> assertEquals(addCount * streamCount, set.size()));

    job.cancel();
}
 
Example 8
Source File: TwitterSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void testTimestampedStream_termFilter() {
    Pipeline pipeline = Pipeline.create();
    List<String> terms = new ArrayList<String>(Arrays.asList("San Mateo", "Brno", "London", "Istanbul"));

    final StreamSource<String> twitterTestStream = TwitterSources.timestampedStream(
            credentials, () -> new StatusesFilterEndpoint().trackTerms(terms));
    StreamStage<String> tweets = pipeline
            .readFrom(twitterTestStream)
            .withNativeTimestamps(0)
            .map(rawJson -> Json.parse(rawJson)
                                .asObject()
                                .getString("text", null));
    tweets.writeTo(AssertionSinks.assertCollectedEventually(60,
            list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.",
                    list.size(), 20)));
    Job job = jet.newJob(pipeline);
    sleepAtLeastSeconds(5);
    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
}
 
Example 9
Source File: Lab1.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);

    p.readFrom(source)
     .withoutTimestamps()
     .writeTo(Sinks.logger());

    // Run the code to see the results in the console
    // Stop it before leaving the lab

    return p;
}
 
Example 10
Source File: Solution2.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    StreamSource<Long> source = TestSources.itemStream(1, (ts, seq) -> seq);
    // StreamSource<String> source = Sources.fileWatcher(DIRECTORY);

    p.readFrom(source)
     .withoutTimestamps()
     // .map( line-> Long.valueOf(line))
     .filter(item -> (item % 2) == 0)
     .writeTo(Sinks.logger());

    return p;
}
 
Example 11
Source File: Lab4.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
     .withoutTimestamps()

    // Convert Trade stream to EnrichedTrade stream
    // - Trade (dto.Trade) has a symbol field
    // - Use LOOKUP_TABLE to look up full company name based on the symbol
    // - Create new Enriched Trade (dto.EnrichedTrade) using Trade and company name

    .writeTo(Sinks.logger());

    return p;
}
 
Example 12
Source File: InfluxDbSinkTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void test_influxDbSink() {
    IList<Integer> measurements = jet.getList("mem_usage");
    for (int i = 0; i < VALUE_COUNT; i++) {
        measurements.add(i);
    }

    InfluxDB db = influxdbContainer.getNewInfluxDB();
    db.query(new Query("DROP SERIES FROM mem_usage"));

    Pipeline p = Pipeline.create();

    int startTime = 0;
    p.readFrom(Sources.list(measurements))
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
                        .build())
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), DATABASE_NAME, USERNAME, PASSWORD));

    jet.newJob(p).join();

    List<Result> results = db.query(new Query("SELECT * FROM mem_usage")).getResults();
    assertEquals(1, results.size());
    List<Series> seriesList = results.get(0).getSeries();
    assertEquals(1, seriesList.size());
    Series series = seriesList.get(0);
    assertEquals(SERIES, series.getName());
    assertEquals(VALUE_COUNT, series.getValues().size());
}
 
Example 13
Source File: Lab5.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1000))
     .withNativeTimestamps(0)

     // STEP 1 - Compute sum of trades for 3-second intervals
     // - Use 3 sec tumbling windows (defined in WindowDef.tumbling with size 3000
     // - Sum trade prices
     // Run the job and inspect the results. Stop the Job before moving to STEP 2.

     // STEP 2 - Compute sum of trades for 3-second intervals with speculative results every second
     // - Use early results when defining the window
     // - Watch the early result flag in the console output
     // Run the job and inspect the results. Stop the Job before moving to STEP 3.

     // STEP 3 - Compute sum of trades in last 3-second, updated each second
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before moving to STEP 4.

     // STEP 4 - Compute sum of trades in last 3-second for each trading symbol
     // - Group the stream on the trading symbol
     // - Use 3 sec sliding windows with 1 sec step
     // Run the job and inspect the results. Stop the Job before leaving the lab.



     .writeTo(Sinks.logger());


    return p;
}
 
Example 14
Source File: AnalysisJet.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the job
 */
public static Pipeline buildPipeline() {
    final Pipeline p = Pipeline.create();

    // Compute map server side
    final BatchStage<Horse> c = p.readFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT));

    final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                                               .aggregate(counting())
                                               .filter(ent -> ent.getValue() > 1);

    c2.writeTo(Sinks.map(MULTIPLE));

    return p;
}
 
Example 15
Source File: BreastCancerClassification.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * Builds and returns the Pipeline which represents the actual computation.
 */
private static Pipeline buildPipeline(Path sourceFile) {
    Pipeline pipeline = Pipeline.create();

    BatchStage<BreastCancerDiagnostic> fileSource = pipeline.readFrom(filesBuilder(sourceFile.getParent().toString())
            .glob(sourceFile.getFileName().toString())
            .build(path -> Files.lines(path).skip(1).map(BreastCancerDiagnostic::new)))
                                                            .setName("Read from CSV input file");

    fileSource.apply(applyPredictionFromModelFile())
              .writeTo(Sinks.logger()).setName("Write to standard out");
    return pipeline;
}
 
Example 16
Source File: Solution4.java    From hazelcast-jet-training with Apache License 2.0 5 votes vote down vote up
private static Pipeline buildPipeline(IMap<String, String> lookupTable) {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource())
            .withNativeTimestamps(0)
            .mapUsingIMap(lookupTable, Trade::getSymbol,
                    (trade, companyName) -> new EnrichedTrade(trade, companyName) )
            .writeTo(Sinks.logger());

    return p;
}
 
Example 17
Source File: PulsarSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 5 votes vote down vote up
@Test
public void when_readFromPulsarConsumer_then_jobGetsAllPublishedMessages() {
    JetInstance[] instances = new JetInstance[2];
    Arrays.setAll(instances, i -> createJetMember());

    String topicName = randomName();
    StreamSource<String> pulsarConsumerSrc = setupConsumerSource(topicName,
            x -> new String(x.getData(), StandardCharsets.UTF_8));

    Pipeline pipeline = Pipeline.create();
    pipeline.readFrom(pulsarConsumerSrc)
            .withoutTimestamps()
            .writeTo(AssertionSinks.assertCollectedEventually(60,
                    list -> {
                        assertEquals("# of Emitted items should be equal to # of published items",
                                ITEM_COUNT, list.size());
                        for (int i = 0; i < ITEM_COUNT; i++) {
                            String message = "hello-pulsar-" + i;
                            Assert.assertTrue("missing entry: " + message, list.contains(message));
                        }
                    })
            );
    Job job = instances[0].newJob(pipeline);
    assertJobStatusEventually(job, JobStatus.RUNNING);

    produceMessages("hello-pulsar", topicName, ITEM_COUNT);

    try {
        job.join();
        fail("Job should have completed with an AssertionCompletedException, but completed normally");
    } catch (CompletionException e) {
        String errorMsg = e.getCause().getMessage();
        assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: "
                + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName()));
    }
    for (JetInstance instance:instances) {
        instance.shutdown();
    }
}
 
Example 18
Source File: ModelServerClassification.java    From hazelcast-jet-demos with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) {
    ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>>
            tfServingContext = ServiceFactory
            .withCreateContextFn(context -> {
                WordIndex wordIndex = new WordIndex(context.attachedDirectory("data"));
                ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress)
                                                              .usePlaintext().build();
                return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex);
            })
            .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow())
            .withCreateServiceFn((context, tuple2) -> tuple2);

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> {
         float[][] featuresTensorData = t.f1().createTensorInput(review);
         TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
         for (float[] featuresTensorDatum : featuresTensorData) {
             for (float v : featuresTensorDatum) {
                 featuresTensorBuilder.addFloatVal(v);
             }
         }
         TensorShapeProto.Dim featuresDim1 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData.length).build();
         TensorShapeProto.Dim featuresDim2 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData[0].length).build();
         TensorShapeProto featuresShape =
                 TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).build();
         featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_FLOAT)
                              .setTensorShape(featuresShape);
         TensorProto featuresTensorProto = featuresTensorBuilder.build();

         // Generate gRPC request
         Int64Value version = Int64Value.newBuilder().setValue(1).build();
         Model.ModelSpec modelSpec =
                 Model.ModelSpec.newBuilder().setName("reviewSentiment").setVersion(version).build();
         Predict.PredictRequest request = Predict.PredictRequest.newBuilder()
                                                                .setModelSpec(modelSpec)
                                                                .putInputs("input_review", featuresTensorProto)
                                                                .build();

         return toCompletableFuture(t.f0().predict(request))
                 .thenApply(response -> {
                     float classification = response
                             .getOutputsOrThrow("dense_1/Sigmoid:0")
                             .getFloatVal(0);
                     // emit the review along with the classification
                     return tuple2(review, classification);
                 });
     })
     .setLocalParallelism(1) // one worker is enough to drive they async calls
     .writeTo(Sinks.logger());
    return p;
}
 
Example 19
Source File: Lab6.java    From hazelcast-jet-training with Apache License 2.0 4 votes vote down vote up
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();

    p.readFrom(TradeSource.tradeSource(1))
      .withNativeTimestamps(0 )

     // Detect if price between two consecutive trades drops by more than 200

     // Use the mapStateful to keep price of previous Trade
     // - Consider using com.hazelcast.jet.accumulator.LongAccumulator as a mutable container for long values
     // - Return the price difference if drop is detected, nothing otherwise

     .writeTo(Sinks.logger( m -> "Price drop: " + m));

    return p;
}
 
Example 20
Source File: MongoDBSourceTest.java    From hazelcast-jet-contrib with Apache License 2.0 votes vote down vote up
@Test
public void testStream_whenWatchDatabase() {
    IList<Document> list = jet.getList("list");

    String connectionString = mongoContainer.connectionString();
    long value = startAtOperationTime.getValue();

    StreamSource<? extends Document> source = MongoDBSourceBuilder
            .streamDatabase(SOURCE_NAME, () -> MongoClients.create(connectionString))
            .databaseFn(client -> client.getDatabase(DB_NAME))
            .destroyFn(MongoClient::close)
            .searchFn(db -> {
                List<Bson> aggregates = new ArrayList<>();
                aggregates.add(Aggregates.match(new Document("fullDocument.val", new Document("$gte", 10))
                        .append("operationType", "insert")));

                aggregates.add(Aggregates.project(new Document("fullDocument.val", 1).append("_id", 1)));
                return db.watch(aggregates);
            })
            .mapFn(ChangeStreamDocument::getFullDocument)
            .startAtOperationTimeFn(client -> new BsonTimestamp(value))
            .build();


    Pipeline p = Pipeline.create();
    p.readFrom(source)
     .withNativeTimestamps(0)
     .writeTo(Sinks.list(list));

    Job job = jet.newJob(p);

    MongoCollection<Document> col1 = collection("col1");
    MongoCollection<Document> col2 = collection("col2");

    col1.insertOne(new Document("val", 1));
    col1.insertOne(new Document("val", 10).append("foo", "bar"));

    col2.insertOne(new Document("val", 2));
    col2.insertOne(new Document("val", 11).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(2, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(10, list.get(0).get("val"));
        assertEquals(11, list.get(1).get("val"));

    });

    col1.insertOne(new Document("val", 3));
    col1.insertOne(new Document("val", 12).append("foo", "bar"));

    col2.insertOne(new Document("val", 4));
    col2.insertOne(new Document("val", 13).append("foo", "bar"));

    assertTrueEventually(() -> {
        assertEquals(4, list.size());
        list.forEach(document -> assertNull(document.get("foo")));

        assertEquals(12, list.get(2).get("val"));
        assertEquals(13, list.get(3).get("val"));
    });

    job.cancel();

}