org.apache.flink.api.common.JobExecutionResult#getAccumulatorResult

Source File: UnalignedCheckpointITCase.java From flink with Apache License 2.0

6 votes

private void execute(int parallelism, int slotsPerTaskManager, boolean slotSharing) throws Exception {
	StreamExecutionEnvironment env = createEnv(parallelism, slotsPerTaskManager, slotSharing);

	long minCheckpoints = 10;
	createDAG(env, minCheckpoints, slotSharing);
	final JobExecutionResult result = env.execute();

	collector.checkThat(result.<Long>getAccumulatorResult(NUM_OUT_OF_ORDER), equalTo(0L));
	collector.checkThat(result.<Long>getAccumulatorResult(NUM_DUPLICATES), equalTo(0L));
	collector.checkThat(result.<Long>getAccumulatorResult(NUM_LOST), equalTo(0L));

	// at this point, there is no way that #input != #output, but still perform these sanity checks
	Long inputs = result.<Long>getAccumulatorResult(NUM_INPUTS);
	collector.checkThat(inputs, greaterThan(0L));
	collector.checkThat(result.<Long>getAccumulatorResult(NUM_OUTPUTS), equalTo(inputs));
}

Source File: MaterializedCollectBatchResult.java From flink with Apache License 2.0

6 votes

@Override
public void accept(JobExecutionResult jobExecutionResult) {
	try {
		final ArrayList<byte[]> accResult = jobExecutionResult.getAccumulatorResult(accumulatorName);
		if (accResult == null) {
			throw new SqlExecutionException("The accumulator could not retrieve the result.");
		}
		final List<Row> resultTable = SerializedListAccumulator.deserializeList(accResult, tableSink.getSerializer());
		// sets the result table all at once
		synchronized (resultLock) {
			MaterializedCollectBatchResult.this.resultTable = resultTable;
		}
	} catch (ClassNotFoundException | IOException e) {
		throw new SqlExecutionException("Serialization error while deserializing collected data.", e);
	}
}

Source File: JavaCounterApp.java From 163-bigdate-note with GNU General Public License v3.0

6 votes

public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<String> data = env.fromElements("hadoop", "spark", "flink", "strom", "pyspark");

    data.map(new RichMapFunction<String, String>() {
        LongCounter counter = new LongCounter();

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            getRuntimeContext().addAccumulator("ele_counter_java", counter);
        }

        @Override
        public String map(String value) throws Exception {
            counter.add(1);
            return value;
        }
    }).writeAsText("file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\sinkout\\sink-java-counter.txt",
            FileSystem.WriteMode.OVERWRITE).setParallelism(3);

    JobExecutionResult counterApp = env.execute("JavaCounterApp");
    Long num = counterApp.getAccumulatorResult("ele_counter_java");
    System.out.println("num:" + num);
}

Source File: ParallelMaximumLikelihood.java From toolbox with Apache License 2.0

5 votes

/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataFlink<DataInstance> dataUpdate) {
    try {
        Configuration config = new Configuration();
        config.setString(BN_NAME, this.dag.getName());
        config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork));

        DataSet<DataInstance> dataset = dataUpdate.getDataSet();
        this.sumSS = dataset.map(new SufficientSatisticsMAP())
                .withParameters(config)
                .reduce(new SufficientSatisticsReduce())
                .collect().get(0);

        //Add the prior
        sumSS.sum(efBayesianNetwork.createInitSufficientStatistics());

        JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult();

        numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName());
        numInstances++;//Initial counts

    }catch(Exception ex){
        throw new UndeclaredThrowableException(ex);
    }

    return this.getLogMarginalProbability();
}

Source File: DataSetUtils.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Convenience method to get the count (number of elements) of a DataSet
 * as well as the checksum (sum over element hashes).
 *
 * @return A ChecksumHashCode that represents the count and checksum of elements in the data set.
 * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly
 */
@Deprecated
public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception {
	final String id = new AbstractID().toString();

	input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode");

	JobExecutionResult res = input.getExecutionEnvironment().execute();
	return res.<Utils.ChecksumHashCode> getAccumulatorResult(id);
}

Source File: ParallelMaximumLikelihood2.java From toolbox with Apache License 2.0

5 votes

/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataFlink<DataInstance> dataUpdate) {

    try {

        this.initLearning();

        Configuration config = new Configuration();
        config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName());
        config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork));

        DataSet<DataInstance> dataset = dataUpdate.getDataSet();

        this.sumSS = dataset.mapPartition(new SufficientSatisticsMAP())
                .withParameters(config)
                .reduce(new SufficientSatisticsReduce())
                .collect().get(0);

        //Add the prior
        sumSS.sum(efBayesianNetwork.createInitSufficientStatistics());

        JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult();

        numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood2.COUNTER_NAME+"_"+this.dag.getName());
        numInstances++;//Initial counts

    }catch(Exception ex){
        throw new UndeclaredThrowableException(ex);
    }

    return this.getLogMarginalProbability();
}

Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: DataSet.java From flink with Apache License 2.0

5 votes

/**
 * Convenience method to get the count (number of elements) of a DataSet.
 *
 * @return A long integer that represents the number of elements in the data set.
 */
public long count() throws Exception {
	final String id = new AbstractID().toString();

	output(new Utils.CountHelper<T>(id)).name("count()");

	JobExecutionResult res = getExecutionEnvironment().execute();
	return res.<Long> getAccumulatorResult(id);
}

Source File: DataSetUtils.java From flink with Apache License 2.0

5 votes

/**
 * Convenience method to get the count (number of elements) of a DataSet
 * as well as the checksum (sum over element hashes).
 *
 * @return A ChecksumHashCode that represents the count and checksum of elements in the data set.
 * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly
 */
@Deprecated
public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception {
	final String id = new AbstractID().toString();

	input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode");

	JobExecutionResult res = input.getExecutionEnvironment().execute();
	return res.<Utils.ChecksumHashCode> getAccumulatorResult(id);
}

Source File: EmptyFieldsCountAccumulator.java From flink with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: DataSet.java From flink with Apache License 2.0

5 votes

/**
 * Convenience method to get the count (number of elements) of a DataSet.
 *
 * @return A long integer that represents the number of elements in the data set.
 */
public long count() throws Exception {
	final String id = new AbstractID().toString();

	output(new Utils.CountHelper<T>(id)).name("count()");

	JobExecutionResult res = getExecutionEnvironment().execute();
	return res.<Long> getAccumulatorResult(id);
}

Source File: DataSetUtils.java From flink with Apache License 2.0

5 votes

/**
 * Convenience method to get the count (number of elements) of a DataSet
 * as well as the checksum (sum over element hashes).
 *
 * @return A ChecksumHashCode that represents the count and checksum of elements in the data set.
 * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly
 */
@Deprecated
public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception {
	final String id = new AbstractID().toString();

	input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode");

	JobExecutionResult res = input.getExecutionEnvironment().execute();
	return res.<Utils.ChecksumHashCode> getAccumulatorResult(id);
}

Source File: EmptyFieldsCountAccumulator.java From Flink-CEPplus with Apache License 2.0

5 votes

public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}

Source File: DataSet.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Convenience method to get the count (number of elements) of a DataSet.
 *
 * @return A long integer that represents the number of elements in the data set.
 */
public long count() throws Exception {
	final String id = new AbstractID().toString();

	output(new Utils.CountHelper<T>(id)).name("count()");

	JobExecutionResult res = getExecutionEnvironment().execute();
	return res.<Long> getAccumulatorResult(id);
}

Source File: AnalyticHelper.java From flink with Apache License 2.0

3 votes

/**
 * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
 * that name was produced.
 *
 * @param accumulatorName The name of the accumulator
 * @param <A> The generic type of the accumulator value
 * @return The value of the accumulator with the given name
 */
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
	JobExecutionResult result = env.getLastJobExecutionResult();

	Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?");

	return result.getAccumulatorResult(id + SEPARATOR + accumulatorName);
}

Source File: AnalyticHelper.java From flink with Apache License 2.0

3 votes

/**
 * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
 * that name was produced.
 *
 * @param accumulatorName The name of the accumulator
 * @param <A> The generic type of the accumulator value
 * @return The value of the accumulator with the given name
 */
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
	JobExecutionResult result = env.getLastJobExecutionResult();

	Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?");

	return result.getAccumulatorResult(id + SEPARATOR + accumulatorName);
}

Source File: AnalyticHelper.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
 * that name was produced.
 *
 * @param accumulatorName The name of the accumulator
 * @param <A> The generic type of the accumulator value
 * @return The value of the accumulator with the given name
 */
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
	JobExecutionResult result = env.getLastJobExecutionResult();

	Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?");

	return result.getAccumulatorResult(id + SEPARATOR + accumulatorName);
}

Java Code Examples for org.apache.flink.api.common.JobExecutionResult#getAccumulatorResult()