Java Examples
Example #1
Source File: From flink with Apache License 2.0 | 6 votes |
public HadoopInputSplit( int splitNumber, org.apache.hadoop.mapred.InputSplit hInputSplit, @Nullable JobConf jobconf) { super(splitNumber, (String) null); if (hInputSplit == null) { throw new NullPointerException("Hadoop input split must not be null"); } if (needsJobConf(hInputSplit) && jobconf == null) { throw new NullPointerException( "Hadoop JobConf must not be null when input split is configurable."); } this.splitType = hInputSplit.getClass(); this.jobConf = jobconf; this.hadoopInputSplit = hInputSplit; }
Example #2
Source File: From alibaba-flink-connectors with Apache License 2.0 | 6 votes |
protected void createParallelReader(Configuration config) throws IOException { if (initialProgress == null) { createInitialProgress(); } long watermarkInterval = 0; if (enableWatermarkEmitter) { // only enable watermark emitter, then get the real watermark interval // the watermark interval is a toggle of watermark emitter watermarkInterval = getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(); } this.parallelReader = new ParallelReader<>(getRuntimeContext(), config, watermarkInterval, tracingMetricEnabled, tracingSampleInterval); parallelReader.setExitAfterReadFinished(exitAfterReadFinished); for (Tuple2<InputSplit, CURSOR> entry : initialProgress) {"entry of initialProgress:{}", entry); RecordReader<T, CURSOR> reader = createReader(config); parallelReader.addRecordReader(reader, entry.f0, entry.f1);"Reader {} seeking to {}", entry.f0, String.valueOf(entry.f1)); } getRuntimeContext().getMetricGroup().counter("partition").inc(initialProgress.size()); }
Example #3
Source File: From Flink-CEPplus with Apache License 2.0 | 6 votes |
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { // read the parent fields and the final fields in.defaultReadObject(); // the job conf knows how to deserialize itself jobConf = new JobConf(); jobConf.readFields(in); try { hadoopInputSplit = (org.apache.hadoop.mapred.InputSplit) WritableFactories.newInstance(splitType); } catch (Exception e) { throw new RuntimeException("Unable to instantiate Hadoop InputSplit", e); } if (hadoopInputSplit instanceof Configurable) { ((Configurable) hadoopInputSplit).setConf(this.jobConf); } else if (hadoopInputSplit instanceof JobConfigurable) { ((JobConfigurable) hadoopInputSplit).configure(this.jobConf); } hadoopInputSplit.readFields(in); }
Example #4
Source File: From flink with Apache License 2.0 | 6 votes |
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { // read the parent fields and the final fields in.defaultReadObject(); // the job conf knows how to deserialize itself jobConf = new JobConf(); jobConf.readFields(in); try { hadoopInputSplit = (org.apache.hadoop.mapred.InputSplit) WritableFactories.newInstance(splitType); } catch (Exception e) { throw new RuntimeException("Unable to instantiate Hadoop InputSplit", e); } if (hadoopInputSplit instanceof Configurable) { ((Configurable) hadoopInputSplit).setConf(this.jobConf); } else if (hadoopInputSplit instanceof JobConfigurable) { ((JobConfigurable) hadoopInputSplit).configure(this.jobConf); } hadoopInputSplit.readFields(in); }
Example #5
Source File: From flink with Apache License 2.0 | 6 votes |
public void resetForNewExecution(final long timestamp, final long expectedGlobalModVersion) throws GlobalModVersionMismatch { synchronized (stateMonitor) { // check and reset the sharing groups with scheduler hints for (int i = 0; i < parallelism; i++) { taskVertices[i].resetForNewExecution(timestamp, expectedGlobalModVersion); } // set up the input splits again try { if (this.inputSplits != null) { // lazy assignment @SuppressWarnings("unchecked") InputSplitSource<InputSplit> splitSource = (InputSplitSource<InputSplit>) jobVertex.getInputSplitSource(); this.splitAssigner = splitSource.getInputSplitAssigner(this.inputSplits); } } catch (Throwable t) { throw new RuntimeException("Re-creating the input split assigner failed: " + t.getMessage(), t); } } }
Example #6
Source File: From flink with Apache License 2.0 | 6 votes |
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { // read the parent fields and the final fields in.defaultReadObject(); try { hadoopInputSplit = (org.apache.hadoop.mapred.InputSplit) WritableFactories.newInstance(splitType); } catch (Exception e) { throw new RuntimeException("Unable to instantiate Hadoop InputSplit", e); } if (needsJobConf(hadoopInputSplit)) { // the job conf knows how to deserialize itself jobConf = new JobConf(); jobConf.readFields(in); if (hadoopInputSplit instanceof Configurable) { ((Configurable) hadoopInputSplit).setConf(this.jobConf); } else if (hadoopInputSplit instanceof JobConfigurable) { ((JobConfigurable) hadoopInputSplit).configure(this.jobConf); } } hadoopInputSplit.readFields(in); }
Example #7
Source File: From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testInputFormatVertex() { try { final TestInputFormat inputFormat = new TestInputFormat(); final InputFormatVertex vertex = new InputFormatVertex("Name"); new TaskConfig(vertex.getConfiguration()).setStubWrapper(new UserCodeObjectWrapper<InputFormat<?, ?>>(inputFormat)); final ClassLoader cl = getClass().getClassLoader(); vertex.initializeOnMaster(cl); InputSplit[] splits = vertex.getInputSplitSource().createInputSplits(77); assertNotNull(splits); assertEquals(1, splits.length); assertEquals(TestSplit.class, splits[0].getClass()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #8
Source File: From flink with Apache License 2.0 | 6 votes |
@Test public void testJDBCInputFormatWithParallelismAndGenericSplitting() throws IOException { Serializable[][] queryParameters = new String[2][1]; queryParameters[0] = new String[]{TEST_DATA[3].author}; queryParameters[1] = new String[]{TEST_DATA[0].author}; ParameterValuesProvider paramProvider = new GenericParameterValuesProvider(queryParameters); jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat() .setDrivername(DERBY_EBOOKSHOP_DB.getDriverClass()) .setDBUrl(DERBY_EBOOKSHOP_DB.getUrl()) .setQuery(SELECT_ALL_BOOKS_SPLIT_BY_AUTHOR) .setRowTypeInfo(ROW_TYPE_INFO) .setParametersProvider(paramProvider) .setResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE) .finish(); jdbcInputFormat.openInputFormat(); InputSplit[] splits = jdbcInputFormat.createInputSplits(1); //this query exploit parallelism (1 split for every queryParameters row) Assert.assertEquals(queryParameters.length, splits.length); verifySplit(splits[0], TEST_DATA[3].id); verifySplit(splits[1], TEST_DATA[0].id + TEST_DATA[1].id); jdbcInputFormat.closeInputFormat(); }
Example #9
Source File: From alibaba-flink-connectors with Apache License 2.0 | 6 votes |
@Override public List<Tuple2<InputSplit, String>> reAssignInputSplitsForCurrentSubTask( int numberOfParallelSubTasks, int indexOfThisSubTask, List<InnerProgress<String>> allSplitsInState) throws IOException { List<Tuple2<InputSplit, String>> initialProgess = new ArrayList<>(); List<Shard> subscribedPartitions = modAssign(numberOfParallelSubTasks, indexOfThisSubTask); for (Shard shard : subscribedPartitions) { boolean existBefore = false; for (InnerProgress<String> progress: allSplitsInState) { if (shard.GetShardId() == progress.getInputSplit().getSplitNumber()){ initialProgess.add(new Tuple2<>(progress.getInputSplit(), progress.getCursor())); existBefore = true; break; } } if (!existBefore) { // æ–°å¢žåŠ çš„shardId æ ‡è¯†0为shard的开头 initialProgess.add(Tuple2.of(new SlsInputSplit(shard.GetShardId()), NEW_SLS_START_FLAG)); } } return initialProgess; }
Example #10
Source File: From flink with Apache License 2.0 | 6 votes |
private List<InputSplit> getRemainingInputSplits(Supplier<SerializedInputSplit> nextInputSplit) throws Exception { final List<InputSplit> actualInputSplits = new ArrayList<>(16); boolean hasMoreInputSplits = true; while (hasMoreInputSplits) { final SerializedInputSplit serializedInputSplit = nextInputSplit.get(); if (serializedInputSplit.isEmpty()) { hasMoreInputSplits = false; } else { final InputSplit inputSplit = InstantiationUtil.deserializeObject(serializedInputSplit.getInputSplitData(), ClassLoader.getSystemClassLoader()); if (inputSplit == null) { hasMoreInputSplits = false; } else { actualInputSplits.add(inputSplit); } } } return actualInputSplits; }
Example #11
Source File: From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public <OT, T extends InputSplit> Pair<OperatorID, InputFormat<OT, T>> getUniqueInputFormat() { Map<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> inputFormats = formats.getInputFormats(); Preconditions.checkState(inputFormats.size() == 1); Map.Entry<OperatorID, UserCodeWrapper<? extends InputFormat<?, ?>>> entry = inputFormats.entrySet().iterator().next(); return new ImmutablePair<>(entry.getKey(), (InputFormat<OT, T>) entry.getValue().getUserCodeObject(InputFormat.class, userCodeClassLoader)); }
Example #12
Source File: From flink with Apache License 2.0 | 5 votes |
/** * Tests that input splits assigned to an Execution will be returned to the InputSplitAssigner * if this execution fails. */ @Test public void testRequestNextInputSplitWithLocalFailover() throws Exception { configuration.setString(JobManagerOptions.EXECUTION_FAILOVER_STRATEGY, FailoverStrategyFactoryLoader.PIPELINED_REGION_RESTART_STRATEGY_NAME); final Function<List<List<InputSplit>>, Collection<InputSplit>> expectFailedExecutionInputSplits = inputSplitsPerTask -> inputSplitsPerTask.get(0); runRequestNextInputSplitTest(expectFailedExecutionInputSplits); }
Example #13
Source File: From alibaba-flink-connectors with Apache License 2.0 | 5 votes |
@Override public InputSplitAssigner getInputSplitAssigner(InputSplit[] inputSplits) { if (!source.initInputSplitInMaster || source.disableParallelRead) { return new DefaultInputSplitAssigner(inputSplits); } else { return new PreAssignedInputSplitAssigner(inputSplits, taskInputSplitSize, taskInputSplitStartIndex); } }
Example #14
Source File: From alibaba-flink-connectors with Apache License 2.0 | 5 votes |
@Override public InputSplitAssigner getInputSplitAssigner(InputSplit[] inputSplits) { LocatableInputSplit[] locatableInputSplits = new LocatableInputSplit[inputSplits.length]; for (int i = 0; i < inputSplits.length; i++) { locatableInputSplits[i] = (LocatableInputSplit) inputSplits[i]; } if (source.isParallelReadDisabled()) { return new LocatableInputSplitAssigner(locatableInputSplits); } return super.getInputSplitAssigner(inputSplits); }
Example #15
Source File: From alibaba-flink-connectors with Apache License 2.0 | 5 votes |
@Override public InputSplit[] createInputSplitsForCurrentSubTask( int numberOfParallelSubTasks, int indexOfThisSubTask) throws IOException { if (null == topic){ if (null == clientProvider){ clientProvider = createProvider(); } this.topic = this.clientProvider.getClient().getTopic(this.projectName, this.topicName); } if (shardIds == null) { initShardsList(); shardIds = new ArrayList<>(); for (ShardEntry shardEntry : initShardsList) { shardIds.add(shardEntry.getShardId()); } } int totalPartitionCount = initShardsList.size(); List<Integer> subscribedShardIndexList = SourceUtils.modAssign("datahub" + topic.getTopicName(), numberOfParallelSubTasks, indexOfThisSubTask, totalPartitionCount); DatahubShardInputSplit[] inputSplits = new DatahubShardInputSplit[subscribedShardIndexList.size()]; int i = 0; for (Integer shardIndex : subscribedShardIndexList) { inputSplits[i++] = new DatahubShardInputSplit(shardIndex, shardIds.get(shardIndex), this.startTime); } return inputSplits; }
Example #16
Source File: From Flink-CEPplus with Apache License 2.0 | 5 votes |
public HadoopInputSplit(int splitNumber, org.apache.hadoop.mapreduce.InputSplit mapreduceInputSplit, JobContext jobContext) { super(splitNumber, (String) null); if (mapreduceInputSplit == null) { throw new NullPointerException("Hadoop input split must not be null"); } if (!(mapreduceInputSplit instanceof Writable)) { throw new IllegalArgumentException("InputSplit must implement Writable interface."); } this.splitType = mapreduceInputSplit.getClass(); this.mapreduceInputSplit = mapreduceInputSplit; }
Example #17
Source File: From flink with Apache License 2.0 | 5 votes |
@Override public InputSplit[] createInputSplits(int minNumSplits) throws IOException { if (parameterValues == null) { return new GenericInputSplit[]{new GenericInputSplit(0, 1)}; } GenericInputSplit[] ret = new GenericInputSplit[parameterValues.length]; for (int i = 0; i < ret.length; i++) { ret[i] = new GenericInputSplit(i, ret.length); } return ret; }
Example #18
Source File: From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public InputSplit[] createInputSplits(int minNumSplits) throws IOException { if (parameterValues == null) { return new GenericInputSplit[]{new GenericInputSplit(0, 1)}; } GenericInputSplit[] ret = new GenericInputSplit[parameterValues.length]; for (int i = 0; i < ret.length; i++) { ret[i] = new GenericInputSplit(i, ret.length); } return ret; }
Example #19
Source File: From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testJDBCInputFormatWithoutParallelismAndNumericColumnSplitting() throws IOException { final long min = TEST_DATA[0].id; final long max = TEST_DATA[TEST_DATA.length - 1].id; final long fetchSize = max + 1; //generate a single split ParameterValuesProvider pramProvider = new NumericBetweenParametersProvider(fetchSize, min, max); jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat() .setDrivername(DRIVER_CLASS) .setDBUrl(DB_URL) .setQuery(JDBCTestBase.SELECT_ALL_BOOKS_SPLIT_BY_ID) .setRowTypeInfo(ROW_TYPE_INFO) .setParametersProvider(pramProvider) .setResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE) .finish(); jdbcInputFormat.openInputFormat(); InputSplit[] splits = jdbcInputFormat.createInputSplits(1); //assert that a single split was generated Assert.assertEquals(1, splits.length); int recordCount = 0; Row row = new Row(5); for (InputSplit split : splits) {; while (!jdbcInputFormat.reachedEnd()) { Row next = jdbcInputFormat.nextRecord(row); assertEquals(TEST_DATA[recordCount], next); recordCount++; } jdbcInputFormat.close(); } jdbcInputFormat.closeInputFormat(); Assert.assertEquals(TEST_DATA.length, recordCount); }
Example #20
Source File: From flink with Apache License 2.0 | 5 votes |
public HadoopInputSplit(int splitNumber, org.apache.hadoop.mapreduce.InputSplit mapreduceInputSplit, JobContext jobContext) { super(splitNumber, (String) null); if (mapreduceInputSplit == null) { throw new NullPointerException("Hadoop input split must not be null"); } if (!(mapreduceInputSplit instanceof Writable)) { throw new IllegalArgumentException("InputSplit must implement Writable interface."); } this.splitType = mapreduceInputSplit.getClass(); this.mapreduceInputSplit = mapreduceInputSplit; }
Example #21
Source File: From Flink-CEPplus with Apache License 2.0 | 5 votes |
protected List<OUT> executeOnCollections(RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { @SuppressWarnings("unchecked") InputFormat<OUT, InputSplit> inputFormat = (InputFormat<OUT, InputSplit>) this.formatWrapper.getUserCodeObject(); //configure the input format inputFormat.configure(this.parameters); //open the input format if (inputFormat instanceof RichInputFormat) { ((RichInputFormat) inputFormat).setRuntimeContext(ctx); ((RichInputFormat) inputFormat).openInputFormat(); } List<OUT> result = new ArrayList<OUT>(); // splits InputSplit[] splits = inputFormat.createInputSplits(1); TypeSerializer<OUT> serializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); for (InputSplit split : splits) {; while (!inputFormat.reachedEnd()) { OUT next = inputFormat.nextRecord(serializer.createInstance()); if (next != null) { result.add(serializer.copy(next)); } } inputFormat.close(); } //close the input format if (inputFormat instanceof RichInputFormat) { ((RichInputFormat) inputFormat).closeInputFormat(); } return result; }
Example #22
Source File: From flink with Apache License 2.0 | 5 votes |
@Override public void open(InputSplit split) { this.session = cluster.connect(); MappingManager manager = new MappingManager(session); Mapper<OUT> mapper = manager.mapper(inputClass); if (mapperOptions != null) { Mapper.Option[] optionsArray = mapperOptions.getMapperOptions(); if (optionsArray != null) { mapper.setDefaultGetOptions(optionsArray); } } this.resultSet =; }
Example #23
Source File: From flink with Apache License 2.0 | 5 votes |
@Override public SerializedInputSplit requestNextInputSplit(JobVertexID vertexID, ExecutionAttemptID executionAttempt) throws IOException { mainThreadExecutor.assertRunningInMainThread(); final Execution execution = executionGraph.getRegisteredExecutions().get(executionAttempt); if (execution == null) { // can happen when JobManager had already unregistered this execution upon on task failure, // but TaskManager get some delay to aware of that situation if (log.isDebugEnabled()) { log.debug("Can not find Execution for attempt {}.", executionAttempt); } // but we should TaskManager be aware of this throw new IllegalArgumentException("Can not find Execution for attempt " + executionAttempt); } final ExecutionJobVertex vertex = executionGraph.getJobVertex(vertexID); if (vertex == null) { throw new IllegalArgumentException("Cannot find execution vertex for vertex ID " + vertexID); } if (vertex.getSplitAssigner() == null) { throw new IllegalStateException("No InputSplitAssigner for vertex ID " + vertexID); } final InputSplit nextInputSplit = execution.getNextInputSplit(); if (log.isDebugEnabled()) { log.debug("Send next input split {}.", nextInputSplit); } try { final byte[] serializedInputSplit = InstantiationUtil.serializeObject(nextInputSplit); return new SerializedInputSplit(serializedInputSplit); } catch (Exception ex) { IOException reason = new IOException("Could not serialize the next input split of class " + nextInputSplit.getClass() + ".", ex);; throw reason; } }
Example #24
Source File: From flink with Apache License 2.0 | 5 votes |
@Override public void returnInputSplit(List<InputSplit> splits, int taskId) { synchronized (this) { for (InputSplit split : splits) { remainingSplits.add((CustomInputSplit) split); } } }
Example #25
Source File: From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSerialSplitAssignmentWithNullHost() { try { final int NUM_SPLITS = 50; final String[][] hosts = new String[][] { new String[] { "localhost" }, new String[0], null }; // load some splits Set<LocatableInputSplit> splits = new HashSet<LocatableInputSplit>(); for (int i = 0; i < NUM_SPLITS; i++) { splits.add(new LocatableInputSplit(i, hosts[i%3])); } // get all available splits LocatableInputSplitAssigner ia = new LocatableInputSplitAssigner(splits); InputSplit is = null; while ((is = ia.getNextInputSplit(null, 0)) != null) { assertTrue(splits.remove(is)); } // check we had all assertTrue(splits.isEmpty()); assertNull(ia.getNextInputSplit("", 0)); assertEquals(NUM_SPLITS, ia.getNumberOfRemoteAssignments()); assertEquals(0, ia.getNumberOfLocalAssignments()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #26
Source File: From flink with Apache License 2.0 | 5 votes |
public InputSplit getNextInputSplit(String host) { final int taskId = getParallelSubtaskIndex(); synchronized (inputSplits) { final InputSplit nextInputSplit = jobVertex.getSplitAssigner().getNextInputSplit(host, taskId); if (nextInputSplit != null) { inputSplits.add(nextInputSplit); } return nextInputSplit; } }
Example #27
Source File: From flink with Apache License 2.0 | 5 votes |
@Test public void testJdbcInputFormatWithoutParallelismAndNumericColumnSplitting() throws IOException { final long min = TEST_DATA[0].id; final long max = TEST_DATA[TEST_DATA.length - 1].id; final long fetchSize = max + 1; //generate a single split JdbcParameterValuesProvider pramProvider = new JdbcNumericBetweenParametersProvider(min, max).ofBatchSize(fetchSize); jdbcInputFormat = JdbcInputFormat.buildJdbcInputFormat() .setDrivername(DERBY_EBOOKSHOP_DB.getDriverClass()) .setDBUrl(DERBY_EBOOKSHOP_DB.getUrl()) .setQuery(SELECT_ALL_BOOKS_SPLIT_BY_ID) .setRowTypeInfo(ROW_TYPE_INFO) .setParametersProvider(pramProvider) .setResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE) .finish(); jdbcInputFormat.openInputFormat(); InputSplit[] splits = jdbcInputFormat.createInputSplits(1); //assert that a single split was generated Assert.assertEquals(1, splits.length); int recordCount = 0; Row row = new Row(5); for (InputSplit split : splits) {; while (!jdbcInputFormat.reachedEnd()) { Row next = jdbcInputFormat.nextRecord(row); assertEquals(TEST_DATA[recordCount], next); recordCount++; } jdbcInputFormat.close(); } jdbcInputFormat.closeInputFormat(); Assert.assertEquals(TEST_DATA.length, recordCount); }
Example #28
Source File: From flink with Apache License 2.0 | 5 votes |
@Test public void testInputFormat() { try { final InputOutputFormatVertex vertex = new InputOutputFormatVertex("Name"); OperatorID operatorID = new OperatorID(); Configuration parameters = new Configuration(); parameters.setString("test_key", "test_value"); new InputOutputFormatContainer(Thread.currentThread().getContextClassLoader()) .addInputFormat(operatorID, new TestInputFormat(parameters)) .addParameters(operatorID, "test_key", "test_value") .write(new TaskConfig(vertex.getConfiguration())); final ClassLoader cl = new TestClassLoader(); vertex.initializeOnMaster(cl); InputSplit[] splits = vertex.getInputSplitSource().createInputSplits(77); assertNotNull(splits); assertEquals(1, splits.length); assertEquals(TestSplit.class, splits[0].getClass()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #29
Source File: From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public InputSplit getNextInputSplit(ClassLoader userCodeClassLoader) { if (this.nextSplit < this.inputSplits.length) { return this.inputSplits[this.nextSplit++]; } return null; }
Example #30
Source File: From flink with Apache License 2.0 | 5 votes |
@Override public void returnInputSplit(List<InputSplit> splits, int taskId) { synchronized (this) { for (InputSplit split : splits) { remainingSplits.add((CustomInputSplit) split); } } }