com.hazelcast.jet.pipeline.Sources Java Exaples

Source File: InfluxDbSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void test_influxDbSink_nonExistingDb() {
    IList<Integer> measurements = jet.getList("mem_usage");
    IntStream.range(0, VALUE_COUNT).forEach(measurements::add);
    influxdbContainer.getNewInfluxDB();

    Pipeline p = Pipeline.create();
    int startTime = 0;
    p.readFrom(Sources.list(measurements))
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
                        .build())
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), "non-existing", USERNAME, PASSWORD));

    expected.expectMessage("database not found: \"non-existing\"");
    jet.newJob(p).join();
}

Source File: RedisSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void stream() {
    IList<String> list = instance.getList("list");
    for (int i = 0; i < 10; i++) {
        list.add("key-" + i);
    }

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
            .writeTo(RedisSinks.stream("source", uri, "stream"));

    instance.newJob(p).join();

    RedisCommands<String, String> sync = connection.sync();
    List<StreamMessage<String, String>> messages = sync.xread(XReadArgs.StreamOffset.from("stream", "0"));
    assertEquals(list.size(), messages.size());
}

Source File: MongoDBSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

6 votes

@Test
public void test() {
    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {
        list.add(i);
    }

    String connectionString = mongoContainer.connectionString();

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
     .map(i -> new Document("key", i))
     .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME));

    jet.newJob(p).join();

    MongoCollection<Document> collection = collection();
    assertEquals(100, collection.countDocuments());
}

Source File: InProcessClassification.java From hazelcast-jet-demos with Apache License 2.0

6 votes

private static Pipeline buildPipeline(IMap<Long, String> reviewsMap) {
    // Set up the mapping context that loads the model on each member, shared
    // by all parallel processors on that member.
    ServiceFactory<Tuple2<SavedModelBundle, WordIndex>, Tuple2<SavedModelBundle, WordIndex>> modelContext = ServiceFactory
            .withCreateContextFn(context -> {
                File data = context.attachedDirectory("data");
                SavedModelBundle bundle = SavedModelBundle.load(data.toPath().resolve("model/1").toString(), "serve");
                return tuple2(bundle, new WordIndex(data));
            })
            .withDestroyContextFn(t -> t.f0().close())
            .withCreateServiceFn((context, tuple2) -> tuple2);
    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingService(modelContext, (tuple, review) -> classify(review, tuple.f0(), tuple.f1()))
     // TensorFlow executes models in parallel, we'll use 2 local threads to maximize throughput.
     .setLocalParallelism(2)
     .writeTo(Sinks.logger(t -> String.format("Sentiment rating for review \"%s\" is %.2f", t.f0(), t.f1())));
    return p;
}

Source File: HazelcastJetInterpreterUtilsTest.java From zeppelin with Apache License 2.0

6 votes

@Test
public void testDisplayNetworkFromDAGUtil() {

  Pipeline p = Pipeline.create();
  p.drawFrom(Sources.<String>list("text"))
    .flatMap(word ->
      traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing")
    .filter(word -> !word.isEmpty())
    .groupingKey(wholeItem())
    .aggregate(counting())
    .drainTo(Sinks.map("counts"));

  assertEquals(
              NETWORK_RESULT_1,
              HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag())
  );

}

Source File: InfluxDbSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

5 votes

@Test
public void test_influxDbSink() {
    IList<Integer> measurements = jet.getList("mem_usage");
    for (int i = 0; i < VALUE_COUNT; i++) {
        measurements.add(i);
    }

    InfluxDB db = influxdbContainer.getNewInfluxDB();
    db.query(new Query("DROP SERIES FROM mem_usage"));

    Pipeline p = Pipeline.create();

    int startTime = 0;
    p.readFrom(Sources.list(measurements))
     .map(index -> Point.measurement("mem_usage")
                        .time(startTime + index, TimeUnit.MILLISECONDS)
                        .addField("value", index)
                        .build())
     .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), DATABASE_NAME, USERNAME, PASSWORD));

    jet.newJob(p).join();

    List<Result> results = db.query(new Query("SELECT * FROM mem_usage")).getResults();
    assertEquals(1, results.size());
    List<Series> seriesList = results.get(0).getSeries();
    assertEquals(1, seriesList.size());
    Series series = seriesList.get(0);
    assertEquals(SERIES, series.getName());
    assertEquals(VALUE_COUNT, series.getValues().size());
}

Source File: MongoDBSinkTest.java From hazelcast-jet-contrib with Apache License 2.0

5 votes

@Test
public void test_whenServerNotAvailable() {
    String connectionString = mongoContainer.connectionString();
    mongoContainer.close();

    IList<Integer> list = jet.getList("list");
    for (int i = 0; i < 100; i++) {
        list.add(i);
    }

    Sink<Document> sink = MongoDBSinks
            .<Document>builder(SINK_NAME, () -> mongoClient(connectionString, 3))
            .databaseFn(client -> client.getDatabase(DB_NAME))
            .collectionFn(db -> db.getCollection(COL_NAME))
            .destroyFn(MongoClient::close)
            .build();


    Pipeline p = Pipeline.create();
    p.readFrom(Sources.list(list))
     .map(i -> new Document("key", i))
     .writeTo(sink);

    try {
        jet.newJob(p).join();
        fail();
    } catch (CompletionException e) {
        assertTrue(e.getCause() instanceof JetException);
    }
}

Source File: PreciousHistory.java From hazelcast-jet-demos with Apache License 2.0

5 votes

public static Pipeline build() {
    Pipeline p = Pipeline.create();

    // Palladium and Platinum only
    p.readFrom(Sources.<String, Object>mapJournal(
            Constants.IMAP_NAME_PRECIOUS, JournalInitialPosition.START_FROM_OLDEST)
    ).withoutTimestamps()
     .map(e -> e.getKey() + "==" + e.getValue())
     .filter(str -> str.toLowerCase().startsWith("p"))
     .writeTo(Sinks.logger());

    return p;
}

Source File: MarkovChainGenerator.java From hazelcast-jet-demos with Apache License 2.0

5 votes

/**
 * Builds and returns the Pipeline which represents the actual computation.
 * To compute the probability of finding word B after A, one has to know
 * how many pairs contain word A as a first entry and how many of them
 * contain B as a second entry. The pipeline creates pairs from consecutive
 * words and computes the probabilities of A->B.
 */
private static Pipeline buildPipeline() {
    Pipeline p = Pipeline.create();
    // Reads files line-by-line
    BatchStage<String> lines = p.readFrom(Sources.<String>files(INPUT_FILE));
    Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)");
    // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions
    // and aggregates them into an IMap.
    lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2))))
         .groupingKey(Tuple2::f0)
         .aggregate(buildAggregateOp())
         .writeTo(Sinks.map("stateTransitions"));
    return p;
}

Source File: JetBetMain.java From hazelcast-jet-demos with Apache License 2.0

5 votes

/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the real-time analysis
 */
public static Pipeline buildPipeline() {
    final Pipeline pipeline = Pipeline.create();

    // Draw users from the Hazelcast IMDG source
    BatchStage<User> users = pipeline.readFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue));

    // All bet legs which are single
    BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream(
            user.getKnownBets().stream()
                .filter(Bet::single)
                .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet)))
            )
    );

    // Find for each race the projected loss if each horse was to win
    BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate(
            AggregateOperations.toMap(
                    Tuple3::f1,
                    t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win
                    (l, r) -> l + r
            )
    );

    // Write out: (r : (h : losses))
    betsByRace.writeTo(Sinks.map(WORST_ID));

    return pipeline;
}

Source File: AnalysisJet.java From hazelcast-jet-demos with Apache License 2.0

5 votes

/**
 * Helper method to construct the pipeline for the job
 *
 * @return the pipeline for the job
 */
public static Pipeline buildPipeline() {
    final Pipeline p = Pipeline.create();

    // Compute map server side
    final BatchStage<Horse> c = p.readFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT));

    final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem())
                                               .aggregate(counting())
                                               .filter(ent -> ent.getValue() > 1);

    c2.writeTo(Sinks.map(MULTIPLE));

    return p;
}

Source File: WordCounter.java From tutorials with MIT License

5 votes

private Pipeline createPipeLine() {
    Pipeline p = Pipeline.create();
    p.drawFrom(Sources.<String> list(LIST_NAME))
        .flatMap(word -> traverseArray(word.toLowerCase()
            .split("\\W+")))
        .filter(word -> !word.isEmpty())
        .groupingKey(wholeItem())
        .aggregate(counting())
        .drainTo(Sinks.map(MAP_NAME));
    return p;
}

Source File: ModelServerClassification.java From hazelcast-jet-demos with Apache License 2.0

4 votes

private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) {
    ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>>
            tfServingContext = ServiceFactory
            .withCreateContextFn(context -> {
                WordIndex wordIndex = new WordIndex(context.attachedDirectory("data"));
                ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress)
                                                              .usePlaintext().build();
                return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex);
            })
            .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow())
            .withCreateServiceFn((context, tuple2) -> tuple2);

    Pipeline p = Pipeline.create();
    p.readFrom(Sources.map(reviewsMap))
     .map(Map.Entry::getValue)
     .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> {
         float[][] featuresTensorData = t.f1().createTensorInput(review);
         TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
         for (float[] featuresTensorDatum : featuresTensorData) {
             for (float v : featuresTensorDatum) {
                 featuresTensorBuilder.addFloatVal(v);
             }
         }
         TensorShapeProto.Dim featuresDim1 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData.length).build();
         TensorShapeProto.Dim featuresDim2 =
                 TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData[0].length).build();
         TensorShapeProto featuresShape =
                 TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).build();
         featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_FLOAT)
                              .setTensorShape(featuresShape);
         TensorProto featuresTensorProto = featuresTensorBuilder.build();

         // Generate gRPC request
         Int64Value version = Int64Value.newBuilder().setValue(1).build();
         Model.ModelSpec modelSpec =
                 Model.ModelSpec.newBuilder().setName("reviewSentiment").setVersion(version).build();
         Predict.PredictRequest request = Predict.PredictRequest.newBuilder()
                                                                .setModelSpec(modelSpec)
                                                                .putInputs("input_review", featuresTensorProto)
                                                                .build();

         return toCompletableFuture(t.f0().predict(request))
                 .thenApply(response -> {
                     float classification = response
                             .getOutputsOrThrow("dense_1/Sigmoid:0")
                             .getFloatVal(0);
                     // emit the review along with the classification
                     return tuple2(review, classification);
                 });
     })
     .setLocalParallelism(1) // one worker is enough to drive they async calls
     .writeTo(Sinks.logger());
    return p;
}

Source File: MovingAverage.java From hazelcast-jet-demos with Apache License 2.0

3 votes

/**
 * <p>{@link com.hazelcast.jet.demos.bitcoin.Task4PriceFeed Task4PriceFeed} writes
 * the current price of Bitcoin into an
 * {@link com.hazelcast.map.IMap IMap}. This
 * {@link com.hazelcast.map.IMap IMap} is defined with a
 * {@link com.hazelcast.map.impl.journal.MapEventJournal MapEventJournal}
 * that allows Jet to track the history of changes. Use this as a
 * source to stream in.
 * <p>
 * <p>Don't bother yet with timestamps, they are added in later
 * in the pipeline.
 * </p>
 * <p>Group (route) all events based on the key, which will be
 * "{@code BTCUSD}". However many Jet nodes are running, only
 * one will handle "{@code BTCUSD}". 
 * </p>
 *
 * @param pipeline Will be empty
 * @return The first stage of the pipeline
 */
protected static StreamStageWithKey<Entry<String, Price>, String> 
	buildPriceFeed(Pipeline pipeline) {

	return pipeline.readFrom(
			Sources.<String,Price>mapJournal(
				MyConstants.IMAP_NAME_PRICES_IN,
				JournalInitialPosition.START_FROM_OLDEST)
			)
			.withoutTimestamps()
			.setName("priceFeed")
			.groupingKey(Functions.entryKey());
}

Source File: FileWatcher.java From hazelcast-jet-demos with Apache License 2.0

2 votes

/**
 * <p>A custom source, a filewatcher that produces a continuous stream
 * of lines in files in the "{@code beam-output}" directory.
 * </p>
 * <p>As the Beam job writes the lines, this job reads the lines.
 * </p>
 *
 * @return
 */
protected static StreamSource<String> buildFileWatcherSource() {
	return Sources.filesBuilder(".").glob("beam-output-*").buildWatcher();
}

com.hazelcast.jet.pipeline.Sources Java Examples