Java Code Examples for org.apache.kylin.cube.CubeInstance#getMergingSegments()
The following examples show how to use
org.apache.kylin.cube.CubeInstance#getMergingSegments() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeMergeJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override protected void doExecute() throws Exception { buildLayoutWithUpdate = new BuildLayoutWithUpdate(); String cubeId = getParam(MetadataConstants.P_CUBE_ID); String newSegmentId = getParam(MetadataConstants.P_SEGMENT_IDS); final CubeManager cubeManager = CubeManager.getInstance(config); final CubeInstance cube = cubeManager.getCubeByUuid(cubeId); final CubeSegment mergedSeg = cube.getSegmentById(newSegmentId); mergingSegments = cube.getMergingSegments(mergedSeg); for (CubeSegment segment : mergingSegments) { SegmentInfo segInfo = ManagerHub.getSegmentInfo(config, getParam(MetadataConstants.P_CUBE_ID), segment.getUuid()); mergingSegInfos.add(segInfo); } //merge and save segments mergeSegments(cubeId, newSegmentId); }
Example 2
Source File: MergeOffsetStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeManager = CubeManager.getInstance(context.getConfig()); final CubeInstance cubeCopy = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite(); final String segmentId = CubingExecutableUtil.getSegmentId(this.getParams()); final CubeSegment segCopy = cubeCopy.getSegmentById(segmentId); Preconditions.checkNotNull(segCopy, "Cube segment '" + segmentId + "' not found."); Segments<CubeSegment> mergingSegs = cubeCopy.getMergingSegments(segCopy); Preconditions.checkArgument(mergingSegs.size() > 0, "Merging segment not exist."); Collections.sort(mergingSegs); final CubeSegment first = mergingSegs.get(0); final CubeSegment last = mergingSegs.get(mergingSegs.size() - 1); segCopy.setSegRange(new SegmentRange(first.getSegRange().start, last.getSegRange().end)); segCopy.setSourcePartitionOffsetStart(first.getSourcePartitionOffsetStart()); segCopy.setSourcePartitionOffsetEnd(last.getSourcePartitionOffsetEnd()); segCopy.setTSRange(new TSRange(mergingSegs.getTSStart(), mergingSegs.getTSEnd())); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); try { cubeManager.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to update cube segment offset", e); return ExecuteResult.createError(e); } }
Example 3
Source File: JobStepFactory.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public static NSparkExecutable addStep(DefaultChainedExecutable parent, JobStepType type, CubeInstance cube) { NSparkExecutable step; KylinConfig config = cube.getConfig(); switch (type) { case RESOURCE_DETECT: step = new NResourceDetectStep(parent); break; case CUBING: step = new NSparkCubingStep(config.getSparkBuildClassName()); break; case MERGING: step = new NSparkMergingStep(config.getSparkMergeClassName()); break; case CLEAN_UP_AFTER_MERGE: step = new NSparkUpdateMetaAndCleanupAfterMergeStep(); break; default: throw new IllegalArgumentException(); } step.setParams(parent.getParams()); step.setProject(parent.getProject()); step.setTargetSubject(parent.getTargetSubject()); if (step instanceof NSparkUpdateMetaAndCleanupAfterMergeStep) { CubeSegment mergeSegment = cube.getSegmentById(parent.getTargetSegments().iterator().next()); final Segments<CubeSegment> mergingSegments = cube.getMergingSegments(mergeSegment); step.setParam(MetadataConstants.P_SEGMENT_NAMES, String.join(",", NSparkCubingUtil.toSegmentNames(mergingSegments))); step.setParam(CubingExecutableUtil.SEGMENT_ID, parent.getParam(CubingExecutableUtil.SEGMENT_ID)); step.setParam(MetadataConstants.P_JOB_TYPE, parent.getParam(MetadataConstants.P_JOB_TYPE)); step.setParam(MetadataConstants.P_OUTPUT_META_URL, parent.getParam(MetadataConstants.P_OUTPUT_META_URL)); } parent.addTask(step); //after addTask, step's id is changed step.setDistMetaUrl(config.getJobTmpMetaStoreUrl(parent.getProject(), step.getId())); return step; }
Example 4
Source File: ResourceDetectBeforeMergingJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override protected void doExecute() throws Exception { logger.info("Start detect resource before merge."); String cubeId = getParam(MetadataConstants.P_CUBE_ID); final CubeManager cubeManager = CubeManager.getInstance(config); final CubeInstance cube = cubeManager.getCubeByUuid(cubeId); final CubeSegment mergedSeg = cube.getSegmentById(getParam(MetadataConstants.P_SEGMENT_IDS)); final SegmentInfo mergedSegInfo = MetadataConverter.getSegmentInfo(cube, mergedSeg.getUuid(), mergedSeg.getName(), mergedSeg.getStorageLocationIdentifier()); final List<CubeSegment> mergingSegments = cube.getMergingSegments(mergedSeg); final List<SegmentInfo> segmentInfos = Lists.newArrayList(); Collections.sort(mergingSegments); for (CubeSegment cubeSegment : mergingSegments) { segmentInfos.add(MetadataConverter.getSegmentInfo(cube, cubeSegment.getUuid(), cubeSegment.getName(), cubeSegment.getStorageLocationIdentifier())); } infos.clearMergingSegments(); infos.recordMergingSegments(segmentInfos); Map<Long, DFLayoutMergeAssist> mergeCuboidsAssist = CubeMergeJob.generateMergeAssist(segmentInfos, ss); ResourceDetectUtils.write( new Path(config.getJobTmpShareDir(project, jobId), ResourceDetectUtils.countDistinctSuffix()), ResourceDetectUtils .findCountDistinctMeasure(JavaConversions.asJavaCollection(mergedSegInfo.toBuildLayouts()))); Map<String, List<String>> resourcePaths = Maps.newHashMap(); infos.clearSparkPlans(); for (Map.Entry<Long, DFLayoutMergeAssist> entry : mergeCuboidsAssist.entrySet()) { Dataset<Row> afterMerge = entry.getValue().merge(config, getParam(MetadataConstants.P_CUBE_NAME)); infos.recordSparkPlan(afterMerge.queryExecution().sparkPlan()); List<Path> paths = JavaConversions .seqAsJavaList(ResourceDetectUtils.getPaths(afterMerge.queryExecution().sparkPlan())); List<String> pathStrs = paths.stream().map(Path::toString).collect(Collectors.toList()); resourcePaths.put(String.valueOf(entry.getKey()), pathStrs); } ResourceDetectUtils.write(new Path(config.getJobTmpShareDir(project, jobId), mergedSeg.getUuid() + "_" + ResourceDetectUtils.fileName()), resourcePaths); }
Example 5
Source File: MergeOffsetStep.java From kylin with Apache License 2.0 | 5 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeManager = CubeManager.getInstance(context.getConfig()); final CubeInstance cubeCopy = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite(); final String segmentId = CubingExecutableUtil.getSegmentId(this.getParams()); final CubeSegment segCopy = cubeCopy.getSegmentById(segmentId); Preconditions.checkNotNull(segCopy, "Cube segment '" + segmentId + "' not found."); Segments<CubeSegment> mergingSegs = cubeCopy.getMergingSegments(segCopy); Preconditions.checkArgument(mergingSegs.size() > 0, "Merging segment not exist."); Collections.sort(mergingSegs); final CubeSegment first = mergingSegs.get(0); final CubeSegment last = mergingSegs.get(mergingSegs.size() - 1); segCopy.setSegRange(new SegmentRange(first.getSegRange().start, last.getSegRange().end)); segCopy.setSourcePartitionOffsetStart(first.getSourcePartitionOffsetStart()); segCopy.setSourcePartitionOffsetEnd(last.getSourcePartitionOffsetEnd()); segCopy.setTSRange(new TSRange(mergingSegs.getTSStart(), mergingSegs.getTSEnd())); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); try { cubeManager.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to update cube segment offset", e); return ExecuteResult.createError(e); } }
Example 6
Source File: MergeCuboidJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); String input = getOptionValue(OPTION_INPUT_PATH); String output = getOptionValue(OPTION_OUTPUT_PATH); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentID = getOptionValue(OPTION_SEGMENT_ID); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment cubeSeg = cube.getSegmentById(segmentID); // start job String jobName = getOptionValue(OPTION_JOB_NAME); logger.info("Starting: " + jobName); job = Job.getInstance(getConf(), jobName); setJobClasspath(job, cube.getConfig()); // add metadata to distributed cache Segments<CubeSegment> allSegs = cube.getMergingSegments(cubeSeg); allSegs.add(cubeSeg); attachSegmentsMetadataWithDict(allSegs, job.getConfiguration()); // Mapper job.setMapperClass(MergeCuboidMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer job.setReducerClass(CuboidReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set inputs IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(cubeSeg).getOutputFormat(); outputFormat.configureJobInput(job, input); addInputDirs(input, job); // set output outputFormat.configureJobOutput(job, output, cubeSeg); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 7
Source File: MergeDictionaryJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { try { Options options = new Options(); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_META_URL); options.addOption(OPTION_MERGE_SEGMENT_IDS); options.addOption(OPTION_OUTPUT_PATH_DICT); options.addOption(OPTION_OUTPUT_PATH_STAT); parseOptions(options, args); final String segmentId = getOptionValue(OPTION_SEGMENT_ID); final String segmentIds = getOptionValue(OPTION_MERGE_SEGMENT_IDS); final String cubeName = getOptionValue(OPTION_CUBE_NAME); final String metaUrl = getOptionValue(OPTION_META_URL); final String dictOutputPath = getOptionValue(OPTION_OUTPUT_PATH_DICT); final String statOutputPath = getOptionValue(OPTION_OUTPUT_PATH_STAT); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); CubeSegment segment = cube.getSegmentById(segmentId); Segments<CubeSegment> mergingSeg = cube.getMergingSegments(segment); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); job.getConfiguration().set(BatchConstants.ARG_CUBE_NAME, cubeName); job.getConfiguration().set(OPTION_META_URL.getOpt(), metaUrl); job.getConfiguration().set(OPTION_SEGMENT_ID.getOpt(), segmentId); job.getConfiguration().set(OPTION_MERGE_SEGMENT_IDS.getOpt(), segmentIds); job.getConfiguration().set(OPTION_OUTPUT_PATH_STAT.getOpt(), statOutputPath); job.getConfiguration().set("num.map.tasks", String.valueOf(cubeDesc.getAllColumnsNeedDictionaryBuilt().size() + 1)); job.setNumReduceTasks(1); setJobClasspath(job, cube.getConfig()); // dump metadata to HDFS attachSegmentsMetadataWithDict(mergingSeg, metaUrl); // clean output dir HadoopUtil.deletePath(job.getConfiguration(), new Path(dictOutputPath)); job.setMapperClass(MergeDictionaryMapper.class); job.setReducerClass(MergeDictionaryReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(IndexArrInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.NONE); SequenceFileOutputFormat.setOutputPath(job, new Path(dictOutputPath)); logger.info("Starting: " + job.getJobName()); return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 8
Source File: SparkExecutableLivy.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@SuppressWarnings("checkstyle:methodlength") @Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { ExecutableManager mgr = getManager(); Map<String, String> extra = mgr.getOutput(getId()).getExtra(); String sparkJobId = extra.get(ExecutableConstants.SPARK_JOB_ID); if (!StringUtils.isEmpty(sparkJobId)) { return onResumed(sparkJobId, mgr); } else { String cubeName = this.getParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt()); CubeInstance cube = CubeManager.getInstance(context.getConfig()).getCube(cubeName); final KylinConfig config = cube.getConfig(); setAlgorithmLayer(); LivyRestBuilder livyRestBuilder = new LivyRestBuilder(); String segmentID = this.getParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt()); CubeSegment segment = cube.getSegmentById(segmentID); Segments<CubeSegment> mergingSeg = cube.getMergingSegments(segment); dumpMetadata(segment, mergingSeg); Map<String, String> sparkConfs = config.getSparkConfigOverride(); String sparkConfigName = getSparkConfigName(); if (sparkConfigName != null) { Map<String, String> sparkSpecificConfs = config.getSparkConfigOverrideWithSpecificName(sparkConfigName); sparkConfs.putAll(sparkSpecificConfs); } for (Map.Entry<String, String> entry : sparkConfs.entrySet()) { if (entry.getKey().equals("spark.submit.deployMode") || entry.getKey().equals("spark.master") || entry.getKey().equals("spark.yarn.archive")) { continue; } else { livyRestBuilder.addConf(entry.getKey(), entry.getValue()); } } formatArgs(livyRestBuilder.getArgs()); final LivyRestExecutor executor = new LivyRestExecutor(); final PatternedLogger patternedLogger = new PatternedLogger(logger, (infoKey, info) -> { // only care three properties here if (ExecutableConstants.SPARK_JOB_ID.equals(infoKey) || ExecutableConstants.YARN_APP_ID.equals(infoKey) || ExecutableConstants.YARN_APP_URL.equals(infoKey)) { getManager().addJobInfo(getId(), info); } }); try { livyRestBuilder.setLivyTypeEnum(LivyTypeEnum.job); executor.execute(livyRestBuilder, patternedLogger); if (isDiscarded()) { return new ExecuteResult(ExecuteResult.State.DISCARDED, "Discarded"); } if (isPaused()) { return new ExecuteResult(ExecuteResult.State.STOPPED, "Stopped"); } // done, update all properties Map<String, String> joblogInfo = patternedLogger.getInfo(); // read counter from hdfs String counterOutput = getParam(BatchConstants.ARG_COUNTER_OUTPUT); if (counterOutput != null) { if (HadoopUtil.getWorkingFileSystem().exists(new Path(counterOutput))) { Map<String, String> counterMap = HadoopUtil.readFromSequenceFile(counterOutput); joblogInfo.putAll(counterMap); } else { logger.warn("Spark counter output path not exists: " + counterOutput); } } readCounters(joblogInfo); getManager().addJobInfo(getId(), joblogInfo); return new ExecuteResult(ExecuteResult.State.SUCCEED, patternedLogger.getBufferedLog()); } catch (Exception e) { logger.error("error run spark job:", e); // clear SPARK_JOB_ID on job failure. extra = mgr.getOutput(getId()).getExtra(); extra.put(ExecutableConstants.SPARK_JOB_ID, ""); getManager().addJobInfo(getId(), extra); return new ExecuteResult(ExecuteResult.State.ERROR, e.getMessage()); } } }
Example 9
Source File: MergeCuboidJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); String input = getOptionValue(OPTION_INPUT_PATH); String output = getOptionValue(OPTION_OUTPUT_PATH); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentID = getOptionValue(OPTION_SEGMENT_ID); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment cubeSeg = cube.getSegmentById(segmentID); // start job String jobName = getOptionValue(OPTION_JOB_NAME); logger.info("Starting: " + jobName); job = Job.getInstance(getConf(), jobName); setJobClasspath(job, cube.getConfig()); // add metadata to distributed cache Segments<CubeSegment> allSegs = cube.getMergingSegments(cubeSeg); allSegs.add(cubeSeg); attachSegmentsMetadataWithDict(allSegs, job.getConfiguration()); // Mapper job.setMapperClass(MergeCuboidMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Reducer job.setReducerClass(CuboidReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //set dfs.replication job.getConfiguration().set("dfs.replication", KylinConfig.getInstanceFromEnv().getCuboidDfsReplication()); // set inputs IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(cubeSeg).getOutputFormat(); outputFormat.configureJobInput(job, input); addInputDirs(input, job); // set output outputFormat.configureJobOutput(job, output, cubeSeg); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 10
Source File: MergeDictionaryJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { try { Options options = new Options(); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_META_URL); options.addOption(OPTION_MERGE_SEGMENT_IDS); options.addOption(OPTION_OUTPUT_PATH_DICT); options.addOption(OPTION_OUTPUT_PATH_STAT); parseOptions(options, args); final String segmentId = getOptionValue(OPTION_SEGMENT_ID); final String segmentIds = getOptionValue(OPTION_MERGE_SEGMENT_IDS); final String cubeName = getOptionValue(OPTION_CUBE_NAME); final String metaUrl = getOptionValue(OPTION_META_URL); final String dictOutputPath = getOptionValue(OPTION_OUTPUT_PATH_DICT); final String statOutputPath = getOptionValue(OPTION_OUTPUT_PATH_STAT); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); CubeSegment segment = cube.getSegmentById(segmentId); Segments<CubeSegment> mergingSeg = cube.getMergingSegments(segment); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); job.getConfiguration().set(BatchConstants.ARG_CUBE_NAME, cubeName); job.getConfiguration().set(OPTION_META_URL.getOpt(), metaUrl); job.getConfiguration().set(OPTION_SEGMENT_ID.getOpt(), segmentId); job.getConfiguration().set(OPTION_MERGE_SEGMENT_IDS.getOpt(), segmentIds); job.getConfiguration().set(OPTION_OUTPUT_PATH_STAT.getOpt(), statOutputPath); job.getConfiguration().set("num.map.tasks", String.valueOf(cubeDesc.getAllColumnsNeedDictionaryBuilt().size() + 1)); job.setNumReduceTasks(1); setJobClasspath(job, cube.getConfig()); // dump metadata to HDFS attachSegmentsMetadataWithDict(mergingSeg, metaUrl); // clean output dir HadoopUtil.deletePath(job.getConfiguration(), new Path(dictOutputPath)); job.setMapperClass(MergeDictionaryMapper.class); job.setReducerClass(MergeDictionaryReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(IndexArrInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.NONE); SequenceFileOutputFormat.setOutputPath(job, new Path(dictOutputPath)); logger.info("Starting: " + job.getJobName()); return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 11
Source File: SparkExecutableLivy.java From kylin with Apache License 2.0 | 4 votes |
@SuppressWarnings("checkstyle:methodlength") @Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { ExecutableManager mgr = getManager(); Map<String, String> extra = mgr.getOutput(getId()).getExtra(); String sparkJobId = extra.get(ExecutableConstants.SPARK_JOB_ID); if (!StringUtils.isEmpty(sparkJobId)) { return onResumed(sparkJobId, mgr); } else { String cubeName = this.getParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt()); CubeInstance cube = CubeManager.getInstance(context.getConfig()).getCube(cubeName); final KylinConfig config = cube.getConfig(); setAlgorithmLayer(); LivyRestBuilder livyRestBuilder = new LivyRestBuilder(); String segmentID = this.getParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt()); CubeSegment segment = cube.getSegmentById(segmentID); Segments<CubeSegment> mergingSeg = cube.getMergingSegments(segment); dumpMetadata(segment, mergingSeg); Map<String, String> sparkConfs = config.getSparkConfigOverride(); String sparkConfigName = getSparkConfigName(); if (sparkConfigName != null) { Map<String, String> sparkSpecificConfs = config.getSparkConfigOverrideWithSpecificName(sparkConfigName); sparkConfs.putAll(sparkSpecificConfs); } for (Map.Entry<String, String> entry : sparkConfs.entrySet()) { if (entry.getKey().equals("spark.submit.deployMode") || entry.getKey().equals("spark.master") || entry.getKey().equals("spark.yarn.archive")) { continue; } else { livyRestBuilder.addConf(entry.getKey(), entry.getValue()); } } formatArgs(livyRestBuilder.getArgs()); final LivyRestExecutor executor = new LivyRestExecutor(); final PatternedLogger patternedLogger = new PatternedLogger(logger, (infoKey, info) -> { // only care three properties here if (ExecutableConstants.SPARK_JOB_ID.equals(infoKey) || ExecutableConstants.YARN_APP_ID.equals(infoKey) || ExecutableConstants.YARN_APP_URL.equals(infoKey)) { getManager().addJobInfo(getId(), info); } }); try { livyRestBuilder.setLivyTypeEnum(LivyTypeEnum.job); executor.execute(livyRestBuilder, patternedLogger); if (isDiscarded()) { return new ExecuteResult(ExecuteResult.State.DISCARDED, "Discarded"); } if (isPaused()) { return new ExecuteResult(ExecuteResult.State.STOPPED, "Stopped"); } // done, update all properties Map<String, String> joblogInfo = patternedLogger.getInfo(); // read counter from hdfs String counterOutput = getParam(BatchConstants.ARG_COUNTER_OUTPUT); if (counterOutput != null) { if (HadoopUtil.getWorkingFileSystem().exists(new Path(counterOutput))) { Map<String, String> counterMap = HadoopUtil.readFromSequenceFile(counterOutput); joblogInfo.putAll(counterMap); } else { logger.warn("Spark counter output path not exists: " + counterOutput); } } readCounters(joblogInfo); getManager().addJobInfo(getId(), joblogInfo); return new ExecuteResult(ExecuteResult.State.SUCCEED, patternedLogger.getBufferedLog()); } catch (Exception e) { logger.error("error run spark job:", e); // clear SPARK_JOB_ID on job failure. extra = mgr.getOutput(getId()).getExtra(); extra.put(ExecutableConstants.SPARK_JOB_ID, ""); getManager().addJobInfo(getId(), extra); return new ExecuteResult(ExecuteResult.State.ERROR, e.getMessage()); } } }