com.hazelcast.jet.pipeline.Pipeline Java Examples
The following examples show how to use
com.hazelcast.jet.pipeline.Pipeline.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ModelServerClassification.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) { System.setProperty("hazelcast.logging.type", "log4j"); if (args.length != 2) { System.out.println("Usage: ModelServerClassification <data path> <model server address>"); System.exit(1); } String dataPath = args[0]; String serverAddress = args[1]; JobConfig jobConfig = new JobConfig(); jobConfig.attachDirectory(dataPath, "data"); JetInstance instance = Jet.newJetInstance(); try { IMap<Long, String> reviewsMap = instance.getMap("reviewsMap"); SampleReviews.populateReviewsMap(reviewsMap); Pipeline p = buildPipeline(serverAddress, reviewsMap); instance.newJob(p, jobConfig).join(); } finally { instance.shutdown(); } }
Example #2
Source File: PulsarSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void testPulsarSink() throws PulsarClientException { String topicName = randomName(); Sink<Integer> pulsarSink = setupSink(topicName); // Its projection function -> Integer::doubleValue Pipeline p = Pipeline.create(); List<Integer> numbers = IntStream.range(0, ITEM_COUNT).boxed().collect(Collectors.toList()); p.readFrom(TestSources.items(numbers)) .writeTo(pulsarSink); createJetMember().newJob(p).join(); List<Double> list = consumeMessages(topicName, ITEM_COUNT); assertTrueEventually(() -> { Assert.assertEquals(ITEM_COUNT, list.size()); for (double i = 0; i < ITEM_COUNT; i++) { assertTrue("missing entry: " + i, list.contains(i)); } }, 10); }
Example #3
Source File: TwitterSourceTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void testStream_withTermFilter() { Pipeline pipeline = Pipeline.create(); List<String> terms = new ArrayList<String>(Arrays.asList("BTC", "ETH")); final StreamSource<String> twitterTestStream = TwitterSources.stream( credentials, () -> new StatusesFilterEndpoint().trackTerms(terms)); StreamStage<String> tweets = pipeline .readFrom(twitterTestStream) .withoutTimestamps() .map(rawJson -> Json.parse(rawJson) .asObject() .getString("text", null)); tweets.writeTo(AssertionSinks.assertCollectedEventually(60, list -> assertGreaterOrEquals("Emits at least 20 tweets in 1 min.", list.size(), 20))); Job job = jet.newJob(pipeline); sleepAtLeastSeconds(5); try { job.join(); fail("Job should have completed with an AssertionCompletedException, but completed normally"); } catch (CompletionException e) { String errorMsg = e.getCause().getMessage(); assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: " + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName())); } }
Example #4
Source File: Solution5.java From hazelcast-jet-training with Apache License 2.0 | 6 votes |
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); SinkStage sinkStage = p.readFrom(TradeSource.tradeSource(1000)) .withNativeTimestamps(0) // Step 1 solution // .window(WindowDefinition.tumbling(3000)) // .aggregate(AggregateOperations.summingLong(Trade::getPrice)) // // Step 2 solution // .window(WindowDefinition.tumbling(3000).setEarlyResultsPeriod(1000)) // .aggregate(AggregateOperations.summingLong(Trade::getPrice)) // // Step 3 solution // .window(WindowDefinition.sliding(3000,1000)) // .aggregate(AggregateOperations.summingLong(Trade::getPrice) // // Step 4 solution // .groupingKey(Trade::getSymbol) // .window(WindowDefinition.sliding(3000,1000)) // .aggregate(AggregateOperations.summingLong(Trade::getPrice)) .writeTo(Sinks.logger()); return p; }
Example #5
Source File: HazelcastJetInterpreterUtilsTest.java From zeppelin with Apache License 2.0 | 6 votes |
@Test public void testDisplayNetworkFromDAGUtil() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<String>list("text")) .flatMap(word -> traverseArray(word.toLowerCase().split("\\W+"))).setName("flat traversing") .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map("counts")); assertEquals( NETWORK_RESULT_1, HazelcastJetInterpreterUtils.displayNetworkFromDAG(p.toDag()) ); }
Example #6
Source File: InfluxDbSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void test_influxDbSink_nonExistingDb() { IList<Integer> measurements = jet.getList("mem_usage"); IntStream.range(0, VALUE_COUNT).forEach(measurements::add); influxdbContainer.getNewInfluxDB(); Pipeline p = Pipeline.create(); int startTime = 0; p.readFrom(Sources.list(measurements)) .map(index -> Point.measurement("mem_usage") .time(startTime + index, TimeUnit.MILLISECONDS) .addField("value", index) .build()) .writeTo(InfluxDbSinks.influxDb(influxdbContainer.getUrl(), "non-existing", USERNAME, PASSWORD)); expected.expectMessage("database not found: \"non-existing\""); jet.newJob(p).join(); }
Example #7
Source File: InfluxDbSourceTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void test_stream_influxDbSource_withPojoResultMapper() { InfluxDB db = influxdbContainer.getNewInfluxDB(); fillCpuData(db); Pipeline p = Pipeline.create(); p.readFrom( InfluxDbSources.influxDb("SELECT * FROM test_db..cpu", DATABASE_NAME, influxdbContainer.getUrl(), USERNAME, PASSWORD, Cpu.class)) .addTimestamps(cpu -> cpu.time.toEpochMilli(), 0) .writeTo(Sinks.list("results")); jet.newJob(p).join(); assertEquals(VALUE_COUNT, jet.getList("results").size()); }
Example #8
Source File: InfluxDbSourceTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void test_stream_influxDbSource_withMeasurementMapper() { InfluxDB db = influxdbContainer.getNewInfluxDB(); fillData(db); Pipeline p = Pipeline.create(); p.readFrom( InfluxDbSources.influxDb("SELECT * FROM test_db..test", DATABASE_NAME, influxdbContainer.getUrl(), USERNAME, PASSWORD, (name, tags, columns, row) -> tuple2(row.get(0), row.get(1)))) .writeTo(Sinks.list("results")); jet.newJob(p).join(); assertEquals(VALUE_COUNT, jet.getList("results").size()); }
Example #9
Source File: Solution6.java From hazelcast-jet-training with Apache License 2.0 | 6 votes |
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.readFrom(TradeSource.tradeSource(1)) .withNativeTimestamps(0 ) .mapStateful( LongAccumulator::new, (previousPrice, currentTrade) -> { Long difference = previousPrice.get() - currentTrade.getPrice(); previousPrice.set(currentTrade.getPrice()); return (difference > PRICE_DROP_TRESHOLD) ? difference : null; }) .writeTo(Sinks.logger( m -> "Price drop: " + m)); return p; }
Example #10
Source File: TwitterSourceTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void testBatch() { Pipeline pipeline = Pipeline.create(); String query = "Jet flies"; BatchSource<Status> twitterSearch = TwitterSources.search(credentials, query); BatchStage<String> tweets = pipeline .readFrom(twitterSearch) .map(status -> "@" + status.getUser() + " - " + status.getText()); tweets.writeTo(AssertionSinks.assertCollectedEventually(60, list -> assertGreaterOrEquals("Emits at least 10 tweets in 1 minute.", list.size(), 10))); Job job = jet.newJob(pipeline); sleepAtLeastSeconds(5); try { job.join(); fail("Job should have completed with an AssertionCompletedException, but completed normally"); } catch (CompletionException e) { String errorMsg = e.getCause().getMessage(); assertTrue("Job was expected to complete with AssertionCompletedException, but completed with: " + e.getCause(), errorMsg.contains(AssertionCompletedException.class.getName())); } }
Example #11
Source File: RedisSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void stream() { IList<String> list = instance.getList("list"); for (int i = 0; i < 10; i++) { list.add("key-" + i); } Pipeline p = Pipeline.create(); p.readFrom(Sources.list(list)) .writeTo(RedisSinks.stream("source", uri, "stream")); instance.newJob(p).join(); RedisCommands<String, String> sync = connection.sync(); List<StreamMessage<String, String>> messages = sync.xread(XReadArgs.StreamOffset.from("stream", "0")); assertEquals(list.size(), messages.size()); }
Example #12
Source File: MongoDBSinkTest.java From hazelcast-jet-contrib with Apache License 2.0 | 6 votes |
@Test public void test() { IList<Integer> list = jet.getList("list"); for (int i = 0; i < 100; i++) { list.add(i); } String connectionString = mongoContainer.connectionString(); Pipeline p = Pipeline.create(); p.readFrom(Sources.list(list)) .map(i -> new Document("key", i)) .writeTo(MongoDBSinks.mongodb(SINK_NAME, connectionString, DB_NAME, COL_NAME)); jet.newJob(p).join(); MongoCollection<Document> collection = collection(); assertEquals(100, collection.countDocuments()); }
Example #13
Source File: ReadKafkaIntoHazelcast.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static Pipeline build(String bootstrapServers) { Properties properties = new Properties(); properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString()); properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName()); properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName()); properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); Pipeline pipeline = Pipeline.create(); pipeline .readFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS)) .withoutTimestamps() .writeTo(Sinks.map(Constants.IMAP_NAME_PRECIOUS)); return pipeline; }
Example #14
Source File: DebeziumCDCWithKafkaAndJet.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) { JetInstance jet = JetBootstrap.getInstance(); Properties properties = new Properties(); properties.setProperty("group.id", "cdc-demo"); properties.setProperty("bootstrap.servers", "kafka:9092"); properties.setProperty("key.deserializer", JsonDeserializer.class.getCanonicalName()); properties.setProperty("value.deserializer", JsonDeserializer.class.getCanonicalName()); properties.setProperty("auto.offset.reset", "earliest"); Pipeline p = Pipeline.create(); p.readFrom(KafkaSources.kafka(properties, record -> { HazelcastJsonValue key = new HazelcastJsonValue(record.key().toString()); HazelcastJsonValue value = new HazelcastJsonValue(record.value().toString()); return Util.entry(key, value); }, "dbserver1.inventory.customers")) .withoutTimestamps() .peek() .writeTo(Sinks.map("customers")); jet.newJob(p).join(); }
Example #15
Source File: FlightTelemetry.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) { if (FlightDataSource.API_AUTHENTICATION_KEY.equals("YOUR_API_KEY_HERE")) { System.err.println("API_AUTHENTICATION_KEY not set in FlightDataSource.java"); System.exit(1); } JetInstance jet = getJetInstance(); Pipeline pipeline = buildPipeline(); addListener(jet.getMap(TAKE_OFF_MAP), a -> System.out.println("New aircraft taking off: " + a)); addListener(jet.getMap(LANDING_MAP), a -> System.out.println("New aircraft landing " + a)); try { Job job = jet.newJob(pipeline, new JobConfig().setName("FlightTelemetry").setProcessingGuarantee(ProcessingGuarantee.EXACTLY_ONCE)); job.join(); } finally { Jet.shutdownAll(); } }
Example #16
Source File: Task1JetJob.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
/** * <p>Run one copy of the Moving Average job * in this cluster. * </p> */ @Override public void run(String... args) throws Exception { String prefix = this.getClass().getSimpleName() + " -"; Pipeline pipeline = MovingAverage.build(); JobConfig jobConfig = new JobConfig(); jobConfig.setName(MyConstants.JOB_NAME); // Run job if not already present Job job = this.jetInstance.getJob(jobConfig.getName()); if (job == null) { job = this.jetInstance.newJobIfAbsent(pipeline, jobConfig); } log.info("{} Job '{}', status '{}'.", prefix, job.getName(), job.getStatus()); }
Example #17
Source File: Lab4.java From hazelcast-jet-training with Apache License 2.0 | 6 votes |
public static void main(String[] args) { JetInstance jet = Jet.bootstrappedInstance(); // symbol -> company name // random symbols from https://www.nasdaq.com IMap<String, String> lookupTable = jet.getMap(LOOKUP_TABLE); lookupTable.put("AAPL", "Apple Inc. - Common Stock"); lookupTable.put("GOOGL", "Alphabet Inc."); lookupTable.put("MSFT", "Microsoft Corporation"); Pipeline p = buildPipeline(lookupTable); try { jet.newJob(p).join(); } finally { jet.shutdown(); } }
Example #18
Source File: TrafficPredictor.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) { if (args.length != 2) { System.err.println("Missing command-line arguments: <input file> <output directory>"); System.exit(1); } Path sourceFile = Paths.get(args[0]).toAbsolutePath(); final String targetDirectory = args[1]; if (!Files.isReadable(sourceFile)) { System.err.println("Source file does not exist or is not readable (" + sourceFile + ")"); System.exit(1); } JetInstance instance = Jet.newJetInstance(); Pipeline pipeline = buildPipeline(sourceFile, targetDirectory); try { instance.newJob(pipeline).join(); } finally { Jet.shutdownAll(); } }
Example #19
Source File: RealTimeImageRecognition.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
/** * Builds and returns the Pipeline which represents the actual computation. */ private static Pipeline buildPipeline() { Pipeline pipeline = Pipeline.create(); pipeline.readFrom(WebcamSource.webcam(500)) .withIngestionTimestamps() .mapUsingService(classifierContext(), (ctx, img) -> { Entry<String, Double> classification = classifyWithModel(ctx, img); return tuple3(img, classification.getKey(), classification.getValue()); } ) .window(tumbling(1000)) .aggregate(maxBy(comparingDouble(Tuple3::f2))) .writeTo(buildGUISink()); return pipeline; }
Example #20
Source File: RealTimeImageRecognition.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static void main(String[] args) { validateWebcam(); if (args.length != 1) { System.err.println("Missing command-line argument: <model path>"); System.exit(1); } Path modelPath = Paths.get(args[0]).toAbsolutePath(); if (!Files.isDirectory(modelPath)) { System.err.println("Model path does not exist (" + modelPath + ")"); System.exit(1); } Pipeline pipeline = buildPipeline(); JobConfig jobConfig = new JobConfig(); jobConfig.attachDirectory(modelPath.toString(), "model"); JetInstance jet = Jet.newJetInstance(); try { jet.newJob(pipeline, jobConfig).join(); } finally { Jet.shutdownAll(); } }
Example #21
Source File: Solution4.java From hazelcast-jet-training with Apache License 2.0 | 6 votes |
public static void main (String[] args) { JetInstance jet = Jet.bootstrappedInstance(); // symbol -> company name IMap<String, String> lookupTable = jet.getMap(LOOKUP_TABLE); lookupTable.put("AAPL", "Apple Inc. - Common Stock"); lookupTable.put("GOOGL", "Alphabet Inc."); lookupTable.put("MSFT", "Microsoft Corporation"); Pipeline p = buildPipeline(lookupTable); try { jet.newJob(p).join(); } finally { jet.shutdown(); } }
Example #22
Source File: TradeAnalysis.java From hazelcast-jet-training with Apache License 2.0 | 6 votes |
public static void main(String[] args) { Pipeline p = buildPipeline(); JetInstance jet = Jet.bootstrappedInstance(); try { JobConfig jobConfig = new JobConfig() .setAutoScaling(true) .setName("TradeAnalysis") .setProcessingGuarantee(ProcessingGuarantee.EXACTLY_ONCE); jet.newJob(p, jobConfig).join(); } finally { Jet.shutdownAll(); } }
Example #23
Source File: WordCounter.java From tutorials with MIT License | 6 votes |
public Long countWord(List<String> sentences, String word) { long count = 0; JetInstance jet = Jet.newJetInstance(); try { List<String> textList = jet.getList(LIST_NAME); textList.addAll(sentences); Pipeline p = createPipeLine(); jet.newJob(p) .join(); Map<String, Long> counts = jet.getMap(MAP_NAME); count = counts.get(word); } finally { Jet.shutdownAll(); } return count; }
Example #24
Source File: InProcessClassification.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
private static Pipeline buildPipeline(IMap<Long, String> reviewsMap) { // Set up the mapping context that loads the model on each member, shared // by all parallel processors on that member. ServiceFactory<Tuple2<SavedModelBundle, WordIndex>, Tuple2<SavedModelBundle, WordIndex>> modelContext = ServiceFactory .withCreateContextFn(context -> { File data = context.attachedDirectory("data"); SavedModelBundle bundle = SavedModelBundle.load(data.toPath().resolve("model/1").toString(), "serve"); return tuple2(bundle, new WordIndex(data)); }) .withDestroyContextFn(t -> t.f0().close()) .withCreateServiceFn((context, tuple2) -> tuple2); Pipeline p = Pipeline.create(); p.readFrom(Sources.map(reviewsMap)) .map(Map.Entry::getValue) .mapUsingService(modelContext, (tuple, review) -> classify(review, tuple.f0(), tuple.f1())) // TensorFlow executes models in parallel, we'll use 2 local threads to maximize throughput. .setLocalParallelism(2) .writeTo(Sinks.logger(t -> String.format("Sentiment rating for review \"%s\" is %.2f", t.f0(), t.f1()))); return p; }
Example #25
Source File: ApplicationRunner.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
@Override public void run(String... args) throws Exception { Pipeline pipeline = FileWatcher.build(); JobConfig jobConfig = new JobConfig(); jobConfig.setName(FileWatcher.class.getSimpleName()); this.jetInstance.newJobIfAbsent(pipeline, jobConfig); }
Example #26
Source File: WordCounter.java From tutorials with MIT License | 5 votes |
private Pipeline createPipeLine() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<String> list(LIST_NAME)) .flatMap(word -> traverseArray(word.toLowerCase() .split("\\W+"))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(MAP_NAME)); return p; }
Example #27
Source File: Lab6.java From hazelcast-jet-training with Apache License 2.0 | 5 votes |
public static void main(String[] args) { Pipeline p = buildPipeline(); JetInstance jet = Jet.bootstrappedInstance(); try { Job job = jet.newJob(p); job.join(); } finally { jet.shutdown(); } }
Example #28
Source File: Lab3.java From hazelcast-jet-training with Apache License 2.0 | 5 votes |
public static void main (String[] args) { JetInstance jet = Jet.bootstrappedInstance(); // Subscribe for map events jet.getMap(LATEST_TRADES_PER_SYMBOL).addEntryListener(new TradeListener(), true); Pipeline p = buildPipeline(); try { jet.newJob(p).join(); } finally { jet.shutdown(); } }
Example #29
Source File: BreastCancerClassification.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
/** * Builds and returns the Pipeline which represents the actual computation. */ private static Pipeline buildPipeline(Path sourceFile) { Pipeline pipeline = Pipeline.create(); BatchStage<BreastCancerDiagnostic> fileSource = pipeline.readFrom(filesBuilder(sourceFile.getParent().toString()) .glob(sourceFile.getFileName().toString()) .build(path -> Files.lines(path).skip(1).map(BreastCancerDiagnostic::new))) .setName("Read from CSV input file"); fileSource.apply(applyPredictionFromModelFile()) .writeTo(Sinks.logger()).setName("Write to standard out"); return pipeline; }
Example #30
Source File: MarkovChainGenerator.java From hazelcast-jet-demos with Apache License 2.0 | 5 votes |
/** * Builds and returns the Pipeline which represents the actual computation. * To compute the probability of finding word B after A, one has to know * how many pairs contain word A as a first entry and how many of them * contain B as a second entry. The pipeline creates pairs from consecutive * words and computes the probabilities of A->B. */ private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); // Reads files line-by-line BatchStage<String> lines = p.readFrom(Sources.<String>files(INPUT_FILE)); Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)"); // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions // and aggregates them into an IMap. lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2)))) .groupingKey(Tuple2::f0) .aggregate(buildAggregateOp()) .writeTo(Sinks.map("stateTransitions")); return p; }