org.apache.flink.streaming.api.datastream.DataStreamSource#addSink

Source File: ElasticsearchSinkTestBase.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGetStreamGraph() {
	try {
		TypeInformation<Integer> typeInfo = BasicTypeInfo.INT_TYPE_INFO;
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		DataStreamSource<Integer> dataStream1 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream1.addSink(new DiscardingSink<Integer>());
		assertEquals(2, env.getStreamGraph().getStreamNodes().size());

		DataStreamSource<Integer> dataStream2 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream2.addSink(new DiscardingSink<Integer>());
		assertEquals(2, env.getStreamGraph().getStreamNodes().size());

		DataStreamSource<Integer> dataStream3 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream3.addSink(new DiscardingSink<Integer>());
		// Does not clear the transformations.
		env.getExecutionPlan();
		DataStreamSource<Integer> dataStream4 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
		dataStream4.addSink(new DiscardingSink<Integer>());
		assertEquals(4, env.getStreamGraph("TestJob").getStreamNodes().size());
	} catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: KuduSinkTest.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}

Source File: ElasticsearchSinkTestBase.java From flink with Apache License 2.0

6 votes

/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			SourceSinkDataTestKit.getJsonSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// every ES version throws a different exception in case of timeouts, so don't bother asserting on the exception
		// test passes
		return;
	}

	fail();
}

Source File: ActiveMQConnectorITCase.java From bahir-flink with Apache License 2.0

6 votes

private void createProducerTopology(StreamExecutionEnvironment env, AMQSinkConfig<String> config) {
    DataStreamSource<String> stream = env.addSource(new SourceFunction<String>() {
        @Override
        public void run(SourceContext<String> ctx) throws Exception {
            for (int i = 0; i < MESSAGES_NUM; i++) {
                ctx.collect("amq-" + i);
            }
        }

        @Override
        public void cancel() {}
    });


    AMQSink<String> sink = new AMQSink<>(config);
    stream.addSink(sink);
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    //请将下面的这些字段弄成常量
    InfluxDBConfig config = InfluxDBConfig.builder()
            .url(parameterTool.get("influxdb.url"))
            .username(parameterTool.get("influxdb.username"))
            .password(parameterTool.get("influxdb.password"))
            .database(parameterTool.get("influxdb.database"))
            .batchActions(parameterTool.getInt("influxdb.batchActions"))
            .flushDuration(parameterTool.getInt("influxdb.flushDuration"))
            .enableGzip(parameterTool.getBoolean("influxdb.enableGzip"))
            .createDatabase(parameterTool.getBoolean("influxdb.createDatabase"))
            .build();

    data.addSink(new InfluxDBSink(config));

    env.execute("flink InfluxDB connector");
}

Source File: KuduSinkTest.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    DataStreamSource<MetricEvent> data = KafkaConfigUtil.buildSource(env);

    List<KuduColumnInfo> kuduColumnInfos = new ArrayList<>();
    KuduColumnInfo columnInfo1 = KuduColumnInfo.Builder.createLong("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo2 = KuduColumnInfo.Builder.createFloat("zhisheng").rangeKey(true).build();
    KuduColumnInfo columnInfo3 = KuduColumnInfo.Builder.createString("zhisheng").rangeKey(true).build();
    kuduColumnInfos.add(columnInfo1);
    kuduColumnInfos.add(columnInfo2);
    kuduColumnInfos.add(columnInfo3);

    KuduTableInfo zhisheng = new KuduTableInfo.Builder("zhisheng")
            .replicas(1)
            .createIfNotExist(true)
            .columns(kuduColumnInfos)
            .build();

    data.addSink(new KuduSink<>("127.0.0.1", zhisheng, new PojoSerDe<>(MetricEvent.class)).withInsertWriteMode());
}

Source File: ElasticsearchSinkTestBase.java From flink with Apache License 2.0

6 votes

/**
 * Tests whether the Elasticsearch sink fails when there is no cluster to connect to.
 */
public void runInvalidElasticsearchClusterTest() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction());

	Map<String, String> userConfig = new HashMap<>();
	userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	userConfig.put("cluster.name", "invalid-cluster-name");

	source.addSink(createElasticsearchSinkForNode(
			1,
			"invalid-cluster-name",
			new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"),
			"123.123.123.123")); // incorrect ip address

	try {
		env.execute("Elasticsearch Sink Test");
	} catch (JobExecutionException expectedException) {
		// test passes
		return;
	}

	fail();
}

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisSetDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.SADD));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type");

    assertEquals(NUM_ELEMENTS, jedis.scard(REDIS_KEY));

    jedis.del(REDIS_KEY);
}

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisHashDataTypeWithTTLFromOpt() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunctionHash());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
            new RedisAdditionalTTLMapperFromOpt(RedisCommand.HSET));

    source.addSink(redisSink);
    env.execute("Test Redis Hash Data Type 2");

    assertEquals(NUM_ELEMENTS, jedis.hlen(REDIS_ADDITIONAL_KEY));
    assertEquals(REDIS_TTL_IN_SECS, jedis.ttl(REDIS_ADDITIONAL_KEY));

    jedis.del(REDIS_ADDITIONAL_KEY);
}

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSources() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
		}

		@Override
		public void cancel() {
		}
	};
	DataStreamSource<Integer> src1 = env.addSource(srcFun);
	src1.addSink(new DiscardingSink<Integer>());
	assertEquals(srcFun, getFunctionFromDataSource(src1));

	List<Long> list = Arrays.asList(0L, 1L, 2L);

	DataStreamSource<Long> src2 = env.generateSequence(0, 2);
	assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);

	DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
	assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);

	DataStreamSource<Long> src4 = env.fromCollection(list);
	assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private static <T> SourceFunction<T> getFunctionFromDataSource(DataStreamSource<T> dataStreamSource) {
	dataStreamSource.addSink(new DiscardingSink<T>());
	AbstractUdfStreamOperator<?, ?> operator =
			(AbstractUdfStreamOperator<?, ?>) getOperatorFromDataStream(dataStreamSource);
	return (SourceFunction<T>) operator.getUserFunction();
}

Source File: StreamExecutionEnvironmentTest.java From flink with Apache License 2.0

5 votes

@Test
public void testSources() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	SourceFunction<Integer> srcFun = new SourceFunction<Integer>() {
		private static final long serialVersionUID = 1L;

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
		}

		@Override
		public void cancel() {
		}
	};
	DataStreamSource<Integer> src1 = env.addSource(srcFun);
	src1.addSink(new DiscardingSink<Integer>());
	assertEquals(srcFun, getFunctionFromDataSource(src1));

	List<Long> list = Arrays.asList(0L, 1L, 2L);

	DataStreamSource<Long> src2 = env.generateSequence(0, 2);
	assertTrue(getFunctionFromDataSource(src2) instanceof StatefulSequenceSource);

	DataStreamSource<Long> src3 = env.fromElements(0L, 1L, 2L);
	assertTrue(getFunctionFromDataSource(src3) instanceof FromElementsFunction);

	DataStreamSource<Long> src4 = env.fromCollection(list);
	assertTrue(getFunctionFromDataSource(src4) instanceof FromElementsFunction);
}

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisHyperLogLogDataType() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunction());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
        new RedisCommandMapper(RedisCommand.PFADD));

    source.addSink(redisSink);
    env.execute("Test Redis Hyper Log Log Data Type");

    assertEquals(NUM_ELEMENTS, Long.valueOf(jedis.pfcount(REDIS_KEY)));

    jedis.del(REDIS_KEY);
}

Source File: RedisSinkITCase.java From bahir-flink with Apache License 2.0

5 votes

@Test
public void testRedisStringDataTypeWithTTL() throws Exception {
    DataStreamSource<Tuple2<String, String>> source = env.addSource(new TestSourceFunctionString());
    RedisSink<Tuple2<String, String>> redisSink = new RedisSink<>(jedisPoolConfig,
            new RedisCommandMapperWithTTL(RedisCommand.SETEX));

    source.addSink(redisSink);
    env.execute("Test Redis Set Data Type With TTL");

    assertEquals(TEST_MESSAGE_LENGTH, jedis.strlen(REDIS_KEY));
    assertEquals(REDIS_TTL_IN_SECS, jedis.ttl(REDIS_KEY));

    jedis.del(REDIS_KEY);
}

Source File: FlinkPulsarTableITest.java From pulsar-flink with Apache License 2.0

5 votes

@Test
public void testWriteThenRead() throws Exception {
    String tp = newTopic();
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.getConfig().disableSysoutLogging();
    see.setParallelism(1);

    DataStreamSource ds = see.fromCollection(fooList);
    ds.addSink(
            new FlinkPulsarSink(
                    serviceUrl, adminUrl, Optional.of(tp), getSinkProperties(), TopicKeyExtractor.NULL,
                    SchemaData.Foo.class));

    see.execute("write first");

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    env.setParallelism(1);

    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.connect(getPulsarDescriptor(tp))
            .inAppendMode()
            .registerTableSource(tp);

    Table t = tEnv.scan(tp).select("i, f, bar");
    tEnv.toAppendStream(t, t.getSchema().toRowType())
            .map(new FailingIdentityMapper<Row>(fooList.size()))
            .addSink(new SingletonStreamSink.StringSink<>()).setParallelism(1);

    try {
        env.execute("count elements from topics");
    } catch (Exception e) {

    }
    SingletonStreamSink.compareWithList(fooList.subList(0, fooList.size() - 1).stream().map(Objects::toString).collect(Collectors.toList()));
}

Source File: KafkaShortRetentionTestBase.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
	final String topic = "auto-offset-reset-none-test";
	final int parallelism = 1;

	kafkaServer.createTestTopic(topic, parallelism, 1);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add consumer ----------

	Properties customProps = new Properties();
	customProps.putAll(standardProps);
	customProps.putAll(secureProps);
	customProps.setProperty("auto.offset.reset", "none"); // test that "none" leads to an exception
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	try {
		env.execute("Test auto offset reset none");
	} catch (Throwable e) {
		// check if correct exception has been thrown
		if (!e.getCause().getCause().getMessage().contains("Unable to find previous offset")  // kafka 0.8
			&& !e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition") // kafka 0.9
				) {
			throw e;
		}
	}

	kafkaServer.deleteTestTopic(topic);
}

Source File: FlinkPravegaTableITCase.java From flink-connectors with Apache License 2.0

5 votes

@Test
public void testTableSourceUsingDescriptor() throws Exception {
    StreamExecutionEnvironment execEnvWrite = StreamExecutionEnvironment.getExecutionEnvironment();
    execEnvWrite.setParallelism(1);

    Stream stream = Stream.of(SETUP_UTILS.getScope(), "testJsonTableSource1");
    SETUP_UTILS.createTestStream(stream.getStreamName(), 1);

    // read data from the stream using Table reader
    TableSchema tableSchema = TableSchema.builder()
            .field("user", DataTypes.STRING())
            .field("uri", DataTypes.STRING())
            .field("accessTime", DataTypes.TIMESTAMP(3).bridgedTo(Timestamp.class))
            .build();
    TypeInformation<Row> typeInfo = (RowTypeInfo) TypeConversions.fromDataTypeToLegacyInfo(tableSchema.toRowDataType());

    PravegaConfig pravegaConfig = SETUP_UTILS.getPravegaConfig();

    // Write some data to the stream
    DataStreamSource<Row> dataStream = execEnvWrite
            .addSource(new TableEventSource(EVENT_COUNT_PER_SOURCE));

    FlinkPravegaWriter<Row> pravegaSink = FlinkPravegaWriter.<Row>builder()
            .withPravegaConfig(pravegaConfig)
            .forStream(stream)
            .withSerializationSchema(new JsonRowSerializationSchema.Builder(typeInfo).build())
            .withEventRouter((Row event) -> "fixedkey")
            .build();

    dataStream.addSink(pravegaSink);
    Assert.assertNotNull(execEnvWrite.getExecutionPlan());
    execEnvWrite.execute("PopulateRowData");

    testTableSourceStreamingDescriptor(stream, pravegaConfig);
    testTableSourceBatchDescriptor(stream, pravegaConfig);
}

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately
	env.getConfig().disableSysoutLogging();

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0

4 votes

public void runAutoOffsetResetTest() throws Exception {
	final String topic = "auto-offset-reset-test";

	final int parallelism = 1;
	final int elementsPerPartition = 50000;

	Properties tprops = new Properties();
	tprops.setProperty("retention.ms", "250");
	kafkaServer.createTestTopic(topic, parallelism, 1, tprops);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately

	// ----------- add producer dataflow ----------

	DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() {

		private boolean running = true;

		@Override
		public void run(SourceContext<String> ctx) throws InterruptedException {
			int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
			int limit = cnt + elementsPerPartition;

			while (running && !stopProducer && cnt < limit) {
				ctx.collect("element-" + cnt);
				cnt++;
				Thread.sleep(10);
			}
			LOG.info("Stopping producer");
		}

		@Override
		public void cancel() {
			running = false;
		}
	});
	Properties props = new Properties();
	props.putAll(standardProps);
	props.putAll(secureProps);
	kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null);

	// ----------- add consumer dataflow ----------

	NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema();
	FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props);

	DataStreamSource<String> consuming = env.addSource(source);
	consuming.addSink(new DiscardingSink<String>());

	tryExecute(env, "run auto offset reset test");

	kafkaServer.deleteTestTopic(topic);
}

Java Code Examples for org.apache.flink.streaming.api.datastream.DataStreamSource#addSink()