com.hazelcast.jet.pipeline.Sources Java Examples
The following examples show how to use
com.hazelcast.jet.pipeline.Sources.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InfluxDbSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void test_influxDbSink_nonExistingDb() { IList<Integer> measurements = jet.getList("mem_usage"); IntStream.range(0, VALUE_COUNT).forEach(measurements::add); influxdbContainer.getNewInfluxDB(); Pipeline p = Pipeline.create(); int startTime = 0; p.readFrom(Sources.list(measurements)) .map(index -> Point.measurement("mem_usage") .time(startTime + index, TimeUnit.MILLISECONDS) .addField("value", index) .build()) .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), "non-existing", USERNAME, PASSWORD)); expected.expectMessage("database not found: \"non-existing\""); jet.newJob(p).join(); }
Example #2
Source File: RedisSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void stream() { IList<String> list = instance.getList("list"); for (int i = 0; i < 10; i++) { list.add("key-" + i); } Pipeline p = Pipeline.create(); p.readFrom(Sources.list(list)) .writeTo(RedisSinks.stream("source", uri, "stream")); instance.newJob(p).join(); RedisCommands<String, String> sync = connection.sync(); List<StreamMessage<String, String>> messages = sync.xread(XReadArgs.StreamOffset.from("stream", "0")); assertEquals(list.size(), messages.size()); }
Example #3
Source File: MongoDBSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void test() { IList<Integer> list = jet.getList("list"); for (int i = 0; i < 100; i++) { list.add(i); } String connectionString = mongoContainer.connectionString(); Pipeline p = Pipeline.create(); p.readFrom(Sources.list(list)) .map(i -> new Document("key", i)) .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME)); jet.newJob(p).join(); MongoCollection<Document> collection = collection(); assertEquals(100, collection.countDocuments()); }
Example #4
Source File: InProcessClassification.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
private static Pipeline buildPipeline(IMap<Long, String> reviewsMap) { // Set up the mapping context that loads the model on each member, shared // by all parallel processors on that member. ServiceFactory<Tuple2<SavedModelBundle, WordIndex>, Tuple2<SavedModelBundle, WordIndex>> modelContext = ServiceFactory .withCreateContextFn(context -> { File data = context.attachedDirectory("data"); SavedModelBundle bundle = SavedModelBundle.load(data.toPath().resolve("model/1").toString(), "serve"); return tuple2(bundle, new WordIndex(data)); }) .withDestroyContextFn(t -> t.f0().close()) .withCreateServiceFn((context, tuple2) -> tuple2); Pipeline p = Pipeline.create(); p.readFrom(Sources.map(reviewsMap)) .map(Map.Entry::getValue) .mapUsingService(modelContext, (tuple, review) -> classify(review, tuple.f0(), tuple.f1())) // TensorFlow executes models in parallel, we'll use 2 local threads to maximize throughput. .setLocalParallelism(2) .writeTo(Sinks.logger(t -> String.format("Sentiment rating for review \"%s\" is %.2f", t.f0(), t.f1()))); return p; }
Example #5
Source File: HazelcastJetInterpreterUtilsTest.java From zeppelin with Apache License 2.0 | 6 votes |
@Test public void testDisplayNetworkFromDAGUtil() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<String>list("text")) .flatMap(word -> traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing") .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map("counts")); assertEquals( NETWORK_RESULT_1, HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag()) ); }
Example #6
Source File: InfluxDbSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 5 votes |
@Test public void test_influxDbSink() { IList<Integer> measurements = jet.getList("mem_usage"); for (int i = 0; i < VALUE_COUNT; i++) { measurements.add(i); } InfluxDB db = influxdbContainer.getNewInfluxDB(); db.query(new Query("DROP SERIES FROM mem_usage")); Pipeline p = Pipeline.create(); int startTime = 0; p.readFrom(Sources.list(measurements)) .map(index -> Point.measurement("mem_usage") .time(startTime + index, TimeUnit.MILLISECONDS) .addField("value", index) .build()) .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), DATABASE_NAME, USERNAME, PASSWORD)); jet.newJob(p).join(); List<Result> results = db.query(new Query("SELECT * FROM mem_usage")).getResults(); assertEquals(1, results.size()); List<Series> seriesList = results.get(0).getSeries(); assertEquals(1, seriesList.size()); Series series = seriesList.get(0); assertEquals(SERIES, series.getName()); assertEquals(VALUE_COUNT, series.getValues().size()); }
Example #7
Source File: MongoDBSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 5 votes |
@Test public void test_whenServerNotAvailable() { String connectionString = mongoContainer.connectionString(); mongoContainer.close(); IList<Integer> list = jet.getList("list"); for (int i = 0; i < 100; i++) { list.add(i); } Sink<Document> sink = MongoDBSinks .<Document>builder(SINK_NAME, () -> mongoClient(connectionString, 3)) .databaseFn(client -> client.getDatabase(DB_NAME)) .collectionFn(db -> db.getCollection(COL_NAME)) .destroyFn(MongoClient::close) .build(); Pipeline p = Pipeline.create(); p.readFrom(Sources.list(list)) .map(i -> new Document("key", i)) .writeTo(sink); try { jet.newJob(p).join(); fail(); } catch (CompletionException e) { assertTrue(e.getCause() instanceof JetException); } }
Example #8
Source File: PreciousHistory.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
public static Pipeline build() { Pipeline p = Pipeline.create(); // Palladium and Platinum only p.readFrom(Sources.<String, Object>mapJournal( Constants.IMAP_NAME_PRECIOUS, JournalInitialPosition.START_FROM_OLDEST) ).withoutTimestamps() .map(e -> e.getKey() + "==" + e.getValue()) .filter(str -> str.toLowerCase().startsWith("p")) .writeTo(Sinks.logger()); return p; }
Example #9
Source File: MarkovChainGenerator.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
/** * Builds and returns the Pipeline which represents the actual computation. * To compute the probability of finding word B after A, one has to know * how many pairs contain word A as a first entry and how many of them * contain B as a second entry. The pipeline creates pairs from consecutive * words and computes the probabilities of A->B. */ private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); // Reads files line-by-line BatchStage<String> lines = p.readFrom(Sources.<String>files(INPUT_FILE)); Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)"); // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions // and aggregates them into an IMap. lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2)))) .groupingKey(Tuple2::f0) .aggregate(buildAggregateOp()) .writeTo(Sinks.map("stateTransitions")); return p; }
Example #10
Source File: JetBetMain.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the real-time analysis */ public static Pipeline buildPipeline() { final Pipeline pipeline = Pipeline.create(); // Draw users from the Hazelcast IMDG source BatchStage<User> users = pipeline.readFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue)); // All bet legs which are single BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream( user.getKnownBets().stream() .filter(Bet::single) .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet))) ) ); // Find for each race the projected loss if each horse was to win BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate( AggregateOperations.toMap( Tuple3::f1, t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win (l, r) -> l + r ) ); // Write out: (r : (h : losses)) betsByRace.writeTo(Sinks.map(WORST_ID)); return pipeline; }
Example #11
Source File: AnalysisJet.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the job */ public static Pipeline buildPipeline() { final Pipeline p = Pipeline.create(); // Compute map server side final BatchStage<Horse> c = p.readFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT)); final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem()) .aggregate(counting()) .filter(ent -> ent.getValue() > 1); c2.writeTo(Sinks.map(MULTIPLE)); return p; }
Example #12
Source File: WordCounter.java From tutorials with MIT License | 5 votes |
private Pipeline createPipeLine() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<String> list(LIST_NAME)) .flatMap(word -> traverseArray(word.toLowerCase() .split("\\W+"))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(MAP_NAME)); return p; }
Example #13
Source File: ModelServerClassification.java From hazelcast-jet-demos with Apache License 2.0 | 4 votes |
private static Pipeline buildPipeline(String serverAddress, IMap<Long, String> reviewsMap) { ServiceFactory<Tuple2<PredictionServiceFutureStub, WordIndex>, Tuple2<PredictionServiceFutureStub, WordIndex>> tfServingContext = ServiceFactory .withCreateContextFn(context -> { WordIndex wordIndex = new WordIndex(context.attachedDirectory("data")); ManagedChannel channel = ManagedChannelBuilder.forTarget(serverAddress) .usePlaintext().build(); return Tuple2.tuple2(PredictionServiceGrpc.newFutureStub(channel), wordIndex); }) .withDestroyContextFn(t -> ((ManagedChannel) t.f0().getChannel()).shutdownNow()) .withCreateServiceFn((context, tuple2) -> tuple2); Pipeline p = Pipeline.create(); p.readFrom(Sources.map(reviewsMap)) .map(Map.Entry::getValue) .mapUsingServiceAsync(tfServingContext, 16, true, (t, review) -> { float[][] featuresTensorData = t.f1().createTensorInput(review); TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder(); for (float[] featuresTensorDatum : featuresTensorData) { for (float v : featuresTensorDatum) { featuresTensorBuilder.addFloatVal(v); } } TensorShapeProto.Dim featuresDim1 = TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData.length).build(); TensorShapeProto.Dim featuresDim2 = TensorShapeProto.Dim.newBuilder().setSize(featuresTensorData[0].length).build(); TensorShapeProto featuresShape = TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).build(); featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_FLOAT) .setTensorShape(featuresShape); TensorProto featuresTensorProto = featuresTensorBuilder.build(); // Generate gRPC request Int64Value version = Int64Value.newBuilder().setValue(1).build(); Model.ModelSpec modelSpec = Model.ModelSpec.newBuilder().setName("reviewSentiment").setVersion(version).build(); Predict.PredictRequest request = Predict.PredictRequest.newBuilder() .setModelSpec(modelSpec) .putInputs("input_review", featuresTensorProto) .build(); return toCompletableFuture(t.f0().predict(request)) .thenApply(response -> { float classification = response .getOutputsOrThrow("dense_1/Sigmoid:0") .getFloatVal(0); // emit the review along with the classification return tuple2(review, classification); }); }) .setLocalParallelism(1) // one worker is enough to drive they async calls .writeTo(Sinks.logger()); return p; }
Example #14
Source File: MovingAverage.java From hazelcast-jet-demos with Apache License 2.0 | 3 votes |
/** * <p>{@link com.hazelcast.jet.demos.bitcoin.Task4PriceFeed Task4PriceFeed} writes * the current price of Bitcoin into an * {@link com.hazelcast.map.IMap IMap}. This * {@link com.hazelcast.map.IMap IMap} is defined with a * {@link com.hazelcast.map.impl.journal.MapEventJournal MapEventJournal} * that allows Jet to track the history of changes. Use this as a * source to stream in. * <p> * <p>Don't bother yet with timestamps, they are added in later * in the pipeline. * </p> * <p>Group (route) all events based on the key, which will be * "{@code BTCUSD}". However many Jet nodes are running, only * one will handle "{@code BTCUSD}". * </p> * * @param pipeline Will be empty * @return The first stage of the pipeline */ protected static StreamStageWithKey<Entry<String, Price>, String> buildPriceFeed(Pipeline pipeline) { return pipeline.readFrom( Sources.<String,Price>mapJournal( MyConstants.IMAP_NAME_PRICES_IN, JournalInitialPosition.START_FROM_OLDEST) ) .withoutTimestamps() .setName("priceFeed") .groupingKey(Functions.entryKey()); }
Example #15
Source File: FileWatcher.java From hazelcast-jet-demos with Apache License 2.0 | 2 votes |
/** * <p>A custom source, a filewatcher that produces a continuous stream * of lines in files in the "{@code beam-output}" directory. * </p> * <p>As the Beam job writes the lines, this job reads the lines. * </p> * * @return */ protected static StreamSource<String> buildFileWatcherSource() { return Sources.filesBuilder(".").glob("beam-output-*").buildWatcher(); }