org.apache.flink.api.common.JobExecutionResult#getAllAccumulatorResults

Source File: CliFrontend.java From Flink-CEPplus with Apache License 2.0

6 votes

protected void executeProgram(PackagedProgram program, ClusterClient<?> client, int parallelism) throws ProgramMissingJobException, ProgramInvocationException {
	logAndSysout("Starting execution of program");

	final JobSubmissionResult result = client.run(program, parallelism);

	if (null == result) {
		throw new ProgramMissingJobException("No JobSubmissionResult returned, please make sure you called " +
			"ExecutionEnvironment.execute()");
	}

	if (result.isJobExecutionResult()) {
		logAndSysout("Program execution finished");
		JobExecutionResult execResult = result.getJobExecutionResult();
		System.out.println("Job with JobID " + execResult.getJobID() + " has finished.");
		System.out.println("Job Runtime: " + execResult.getNetRuntime() + " ms");
		Map<String, Object> accumulatorsResult = execResult.getAllAccumulatorResults();
		if (accumulatorsResult.size() > 0) {
			System.out.println("Accumulator Results: ");
			System.out.println(AccumulatorHelper.getResultsFormatted(accumulatorsResult));
		}
	} else {
		logAndSysout("Job has been submitted with JobID " + result.getJobID());
	}
}

Source File: CliFrontend.java From flink with Apache License 2.0

6 votes

protected void executeProgram(PackagedProgram program, ClusterClient<?> client, int parallelism) throws ProgramMissingJobException, ProgramInvocationException {
	logAndSysout("Starting execution of program");

	final JobSubmissionResult result = client.run(program, parallelism);

	if (null == result) {
		throw new ProgramMissingJobException("No JobSubmissionResult returned, please make sure you called " +
			"ExecutionEnvironment.execute()");
	}

	if (result.isJobExecutionResult()) {
		logAndSysout("Program execution finished");
		JobExecutionResult execResult = result.getJobExecutionResult();
		System.out.println("Job with JobID " + execResult.getJobID() + " has finished.");
		System.out.println("Job Runtime: " + execResult.getNetRuntime() + " ms");
		Map<String, Object> accumulatorsResult = execResult.getAllAccumulatorResults();
		if (accumulatorsResult.size() > 0) {
			System.out.println("Accumulator Results: ");
			System.out.println(AccumulatorHelper.getResultsFormatted(accumulatorsResult));
		}
	} else {
		logAndSysout("Job has been submitted with JobID " + result.getJobID());
	}
}

Source File: AccumulatorErrorITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private static void assertAccumulatorsShouldFail(JobExecutionResult result) {
	try {
		result.getAllAccumulatorResults();
		fail("Should have failed");
	}
	catch (Exception ex) {
		assertTrue(ExceptionUtils.findThrowable(ex, CustomException.class).isPresent());
	}
}

Source File: AccumulatorErrorITCase.java From flink with Apache License 2.0

5 votes

private static void assertAccumulatorsShouldFail(JobExecutionResult result) {
	try {
		result.getAllAccumulatorResults();
		fail("Should have failed");
	}
	catch (Exception ex) {
		assertTrue(ExceptionUtils.findThrowable(ex, CustomException.class).isPresent());
	}
}

Source File: FlinkPipelineRunner.java From beam with Apache License 2.0

5 votes

private PortablePipelineResult createPortablePipelineResult(
    JobExecutionResult result, PipelineOptions options) {
  // The package of DetachedJobExecutionResult has been changed in 1.10.
  // Refer to https://github.com/apache/flink/commit/c36b35e6876ecdc717dade653e8554f9d8b543c9 for
  // details.
  String resultClassName = result.getClass().getCanonicalName();
  if (resultClassName.equals(
          "org.apache.flink.client.program.DetachedEnvironment.DetachedJobExecutionResult")
      || resultClassName.equals("org.apache.flink.core.execution.DetachedJobExecutionResult")) {
    LOG.info("Pipeline submitted in Detached mode");
    // no metricsPusher because metrics are not supported in detached mode
    return new FlinkPortableRunnerResult.Detached();
  } else {
    LOG.info("Execution finished in {} msecs", result.getNetRuntime());
    Map<String, Object> accumulators = result.getAllAccumulatorResults();
    if (accumulators != null && !accumulators.isEmpty()) {
      LOG.info("Final accumulator values:");
      for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
        LOG.info("{} : {}", entry.getKey(), entry.getValue());
      }
    }
    FlinkPortableRunnerResult flinkRunnerResult =
        new FlinkPortableRunnerResult(accumulators, result.getNetRuntime());
    MetricsPusher metricsPusher =
        new MetricsPusher(
            flinkRunnerResult.getMetricsContainerStepMap(),
            options.as(MetricsOptions.class),
            flinkRunnerResult);
    metricsPusher.start();
    return flinkRunnerResult;
  }
}

Source File: FlinkRunner.java From beam with Apache License 2.0

5 votes

static PipelineResult createPipelineResult(JobExecutionResult result, PipelineOptions options) {
  // The package of DetachedJobExecutionResult has been changed in 1.10.
  // Refer to https://github.com/apache/flink/commit/c36b35e6876ecdc717dade653e8554f9d8b543c9 for
  // more details.
  String resultClassName = result.getClass().getCanonicalName();
  if (resultClassName.equals(
          "org.apache.flink.client.program.DetachedEnvironment.DetachedJobExecutionResult")
      || resultClassName.equals("org.apache.flink.core.execution.DetachedJobExecutionResult")) {
    LOG.info("Pipeline submitted in Detached mode");
    // no metricsPusher because metrics are not supported in detached mode
    return new FlinkDetachedRunnerResult();
  } else {
    LOG.info("Execution finished in {} msecs", result.getNetRuntime());
    Map<String, Object> accumulators = result.getAllAccumulatorResults();
    if (accumulators != null && !accumulators.isEmpty()) {
      LOG.info("Final accumulator values:");
      for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
        LOG.info("{} : {}", entry.getKey(), entry.getValue());
      }
    }
    FlinkRunnerResult flinkRunnerResult =
        new FlinkRunnerResult(accumulators, result.getNetRuntime());
    MetricsPusher metricsPusher =
        new MetricsPusher(
            flinkRunnerResult.getMetricsContainerStepMap(),
            options.as(MetricsOptions.class),
            flinkRunnerResult);
    metricsPusher.start();
    return flinkRunnerResult;
  }
}

Source File: FlinkPipelineRunner.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public FlinkRunnerResult run(Pipeline pipeline) {
	LOG.info("Executing pipeline using FlinkPipelineRunner.");

	LOG.info("Translating pipeline to Flink program.");

	this.flinkJobEnv.translate(pipeline);

	LOG.info("Starting execution of Flink program.");
	
	JobExecutionResult result;
	try {
		result = this.flinkJobEnv.executePipeline();
	} catch (Exception e) {
		LOG.error("Pipeline execution failed", e);
		throw new RuntimeException("Pipeline execution failed", e);
	}

	LOG.info("Execution finished in {} msecs", result.getNetRuntime());

	Map<String, Object> accumulators = result.getAllAccumulatorResults();
	if (accumulators != null && !accumulators.isEmpty()) {
		LOG.info("Final aggregator values:");

		for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
			LOG.info("{} : {}", entry.getKey(), entry.getValue());
		}
	}

	return new FlinkRunnerResult(accumulators, result.getNetRuntime());
}

Source File: AccumulatorErrorITCase.java From flink with Apache License 2.0

5 votes

private static void assertAccumulatorsShouldFail(JobExecutionResult result) {
	try {
		result.getAllAccumulatorResults();
		fail("Should have failed");
	}
	catch (Exception ex) {
		assertTrue(findThrowable(ex, CustomException.class).isPresent());
	}
}

Source File: FlinkFactDistinctColumns.java From kylin with Apache License 2.0

4 votes

@Override
protected void execute(OptionsHelper optionsHelper) throws Exception {
    String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
    String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
    String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
    String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE);
    String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
    String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
    String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
    int samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_STATS_SAMPLING_PERCENT));
    String enableObjectReuseOptValue = optionsHelper.getOptionValue(OPTION_ENABLE_OBJECT_REUSE);

    Job job = Job.getInstance();
    FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
    HadoopUtil.deletePath(job.getConfiguration(), new Path(outputPath));

    final SerializableConfiguration sConf = new SerializableConfiguration(job.getConfiguration());
    KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

    final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);

    final FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
    final int totalReducer = reducerMapping.getTotalReducerNum();

    logger.info("getTotalReducerNum: {}", totalReducer);
    logger.info("getCuboidRowCounterReducerNum: {}", reducerMapping.getCuboidRowCounterReducerNum());
    logger.info("counter path {}", counterPath);

    boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE.equalsIgnoreCase(envConfig.getFlatTableStorageFormat());

    // calculate source record bytes size
    final String bytesWrittenName = "byte-writer-counter";
    final String recordCounterName = "record-counter";

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    if (!StringUtil.isEmpty(enableObjectReuseOptValue) &&
            enableObjectReuseOptValue.equalsIgnoreCase("true")) {
        env.getConfig().enableObjectReuse();
    }

    DataSet<String[]> recordDataSet = FlinkUtil.readHiveRecords(isSequenceFile, env, inputPath, hiveTable, job);

    // read record from flat table
    // output:
    //   1, statistic
    //   2, field value of dict col
    //   3, min/max field value of not dict col
    DataSet<Tuple2<SelfDefineSortableKey, Text>> flatOutputDataSet = recordDataSet.mapPartition(
            new FlatOutputMapPartitionFunction(sConf, cubeName, segmentId, metaUrl, samplingPercent,
                    bytesWrittenName, recordCounterName));

    // repartition data, make each reducer handle only one col data or the statistic data
    DataSet<Tuple2<SelfDefineSortableKey, Text>> partitionDataSet = flatOutputDataSet
            .partitionCustom(new FactDistinctColumnPartitioner(cubeName, metaUrl, sConf), 0)
            .setParallelism(totalReducer);

    // multiple output result
    // 1, CFG_OUTPUT_COLUMN: field values of dict col, which will not be built in reducer, like globalDictCol
    // 2, CFG_OUTPUT_DICT: dictionary object built in reducer
    // 3, CFG_OUTPUT_STATISTICS: cube statistic: hll of cuboids ...
    // 4, CFG_OUTPUT_PARTITION: dimension value range(min,max)
    DataSet<Tuple2<String, Tuple3<Writable, Writable, String>>> outputDataSet = partitionDataSet
            .mapPartition(new MultiOutputMapPartitionFunction(sConf, cubeName, segmentId, metaUrl, samplingPercent))
            .setParallelism(totalReducer);

    // make each reducer output to respective dir
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class,
            NullWritable.class, ArrayPrimitiveWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class,
            LongWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    // prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    outputDataSet.output(new HadoopMultipleOutputFormat(new LazyOutputFormat(), job));

    JobExecutionResult jobExecutionResult =
            env.execute("Fact distinct columns for:" + cubeName + " segment " + segmentId);
    Map<String, Object> accumulatorResults = jobExecutionResult.getAllAccumulatorResults();
    Long recordCount = (Long) accumulatorResults.get(recordCounterName);
    Long bytesWritten = (Long) accumulatorResults.get(bytesWrittenName);
    logger.info("Map input records={}", recordCount);
    logger.info("HDFS Read: {} HDFS Write", bytesWritten);
    logger.info("HDFS: Number of bytes written=" + FlinkBatchCubingJobBuilder2.getFileSize(outputPath, fs));

    Map<String, String> counterMap = Maps.newHashMap();
    counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount));
    counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten));

    // save counter to hdfs
    HadoopUtil.writeToSequenceFile(job.getConfiguration(), counterPath, counterMap);
}

Source File: AdaptivePageRank.java From flink-perf with Apache License 2.0

2 votes

public static void main(String[] args) throws Exception {

		long numVertices = 41652230;

		double threshold = 0.005 / numVertices;
		double dampeningFactor = 0.85;

		String adjacencyPath = args.length > 1 ? args[0] : "/data/demodata/pagerank/edges/edges.csv";
		String outpath = args.length > 2 ? args[1] : "/data/demodata/pagerank/adacency_comp";
		int numIterations = args.length > 3 ? Integer.valueOf(args[2]) : 100;

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	//	env.setDegreeOfParallelism(4);

		DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());
		DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());


		DataSet<Tuple2<Long, Double>> initialRanks = adjacency
				.flatMap(new InitialMessageBuilder(numVertices, dampeningFactor))
				.groupBy(0)
				.reduceGroup(new Agg());

		DataSet<Tuple2<Long, Double>> initialDeltas = initialRanks.map(new InitialDeltaBuilder(numVertices));


		// ---------- iterative part ---------

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> adaptiveIteration = initialRanks.iterateDelta(initialDeltas, numIterations, 0);

		DataSet<Tuple2<Long, Double>> deltas = adaptiveIteration.getWorkset()
				.join(adjacency2).where(0).equalTo(0).with(new DeltaDistributor(0.85))
				.groupBy(0)
				.reduceGroup(new AggAndFilter(threshold));

		DataSet<Tuple2<Long, Double>> rankUpdates = adaptiveIteration.getSolutionSet()
				.join(deltas).where(0).equalTo(0).with(new SolutionJoin());

		adaptiveIteration.closeWith(rankUpdates, deltas)
				.writeAsCsv(outpath + "_adapt", WriteMode.OVERWRITE);


//		System.out.println(env.getExecutionPlan());
		JobExecutionResult result = env.execute("Adaptive Page Rank");

		Map<String, Object> accumulators = result.getAllAccumulatorResults();
		List<String> keys = new ArrayList<String>(accumulators.keySet());
		Collections.sort(keys);
		for (String key : keys) {
			System.out.println(key + " : " + accumulators.get(key));
		}
	}

Java Code Examples for org.apache.flink.api.common.JobExecutionResult#getAllAccumulatorResults()