Java Code Examples for org.apache.kylin.cube.CubeInstance#getSegmentById()
The following examples show how to use
org.apache.kylin.cube.CubeInstance#getSegmentById() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobService.java From Kylin with Apache License 2.0 | 6 votes |
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN + " or hasPermission(#job, 'ADMINISTRATION') or hasPermission(#job, 'OPERATION') or hasPermission(#job, 'MANAGEMENT')") public JobInstance cancelJob(String jobId) throws IOException, JobException { // CubeInstance cube = this.getCubeManager().getCube(job.getRelatedCube()); // for (BuildCubeJob cubeJob: listAllCubingJobs(cube.getName(), null, EnumSet.of(ExecutableState.READY, ExecutableState.RUNNING))) { // getExecutableManager().stopJob(cubeJob.getId()); // } final JobInstance jobInstance = getJobInstance(jobId); final String segmentId = jobInstance.getRelatedSegment(); CubeInstance cubeInstance = getCubeManager().getCube(jobInstance.getRelatedCube()); final CubeSegment segment = cubeInstance.getSegmentById(segmentId); if (segment.getStatus() == SegmentStatusEnum.NEW) { cubeInstance.getSegments().remove(segment); getCubeManager().updateCube(cubeInstance); } getExecutableManager().discardJob(jobId); return jobInstance; }
Example 2
Source File: CuboidShardUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static void saveCuboidShards(CubeSegment segment, Map<Long, Short> cuboidShards, int totalShards) throws IOException { CubeManager cubeManager = CubeManager.getInstance(segment.getConfig()); Map<Long, Short> filtered = Maps.newHashMap(); for (Map.Entry<Long, Short> entry : cuboidShards.entrySet()) { if (entry.getValue() > 1) { filtered.put(entry.getKey(), entry.getValue()); } } // work on copy instead of cached objects CubeInstance cubeCopy = segment.getCubeInstance().latestCopyForWrite(); CubeSegment segCopy = cubeCopy.getSegmentById(segment.getUuid()); segCopy.setCuboidShardNums(filtered); segCopy.setTotalShards(totalShards); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); cubeManager.updateCube(update); }
Example 3
Source File: JobService.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void cancelCubingJobInner(CubingJob cubingJob) throws IOException { CubeInstance cubeInstance = getCubeManager().getCube(CubingExecutableUtil.getCubeName(cubingJob.getParams())); // might not a cube job final String segmentIds = CubingExecutableUtil.getSegmentId(cubingJob.getParams()); if (!StringUtils.isEmpty(segmentIds)) { for (String segmentId : StringUtils.split(segmentIds)) { final CubeSegment segment = cubeInstance.getSegmentById(segmentId); if (segment != null && (segment.getStatus() == SegmentStatusEnum.NEW || segment.getTSRange().end.v == 0)) { // Remove this segment getCubeManager().updateCubeDropSegments(cubeInstance, segment); } } } getExecutableManager().discardJob(cubingJob.getId()); }
Example 4
Source File: CopyDictionaryStep.java From kylin with Apache License 2.0 | 6 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite(); final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment); Preconditions.checkNotNull(oldSegment, "cannot find the original segment to be optimized by " + optimizeSegment); // --- Copy dictionary optimizeSegment.getDictionaries().putAll(oldSegment.getDictionaries()); optimizeSegment.getSnapshots().putAll(oldSegment.getSnapshots()); optimizeSegment.getRowkeyStats().addAll(oldSegment.getRowkeyStats()); try { CubeUpdate cubeBuilder = new CubeUpdate(cube); cubeBuilder.setToUpdateSegs(optimizeSegment); mgr.updateCube(cubeBuilder); } catch (IOException e) { logger.error("fail to merge dictionary or lookup snapshots", e); return ExecuteResult.createError(e); } return new ExecuteResult(); }
Example 5
Source File: JobService.java From kylin with Apache License 2.0 | 6 votes |
private void cancelCubingJobInner(CubingJob cubingJob) throws IOException { CubeInstance cubeInstance = getCubeManager().getCube(CubingExecutableUtil.getCubeName(cubingJob.getParams())); // might not a cube job final String segmentIds = CubingExecutableUtil.getSegmentId(cubingJob.getParams()); if (!StringUtils.isEmpty(segmentIds)) { for (String segmentId : StringUtils.split(segmentIds)) { final CubeSegment segment = cubeInstance.getSegmentById(segmentId); if (segment != null && (segment.getStatus() == SegmentStatusEnum.NEW || segment.getTSRange().end.v == 0)) { // Remove this segment getCubeManager().updateCubeDropSegments(cubeInstance, segment); } } } getExecutableManager().discardJob(cubingJob.getId()); }
Example 6
Source File: SparkExecutable.java From kylin with Apache License 2.0 | 6 votes |
private void updateSparkDimensionDicMetadata(KylinConfig config, CubeInstance cube, String segmentId) throws IOException { KylinConfig hdfsConfig = AbstractHadoopJob .loadKylinConfigFromHdfs(this.getParam(SparkBuildDictionary.OPTION_META_URL.getOpt())); CubeInstance cubeInstance = CubeManager.getInstance(hdfsConfig).reloadCube(cube.getName()); CubeSegment segment = cubeInstance.getSegmentById(segmentId); CubeSegment oldSeg = cube.getSegmentById(segmentId); oldSeg.setDictionaries((ConcurrentHashMap<String, String>) segment.getDictionaries()); oldSeg.setSnapshots((ConcurrentHashMap) segment.getSnapshots()); oldSeg.getRowkeyStats().addAll(segment.getRowkeyStats()); CubeInstance cubeCopy = cube.latestCopyForWrite(); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(oldSeg); CubeManager.getInstance(config).updateCube(update); Set<String> dumpList = new LinkedHashSet<>(); dumpList.addAll(segment.getDictionaryPaths()); dumpList.addAll(segment.getSnapshotPaths()); JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segment.getConfig(), config.getMetadataUrl().toString()); }
Example 7
Source File: NDCuboidMapper.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); String cuboidModeName = context.getConfiguration().get(BatchConstants.CFG_CUBOID_MODE); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegmentById(segmentID); ndCuboidBuilder = new NDCuboidBuilder(cubeSegment); // initialize CubiodScheduler cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSegment, cuboidModeName); rowKeySplitter = new RowKeySplitter(cubeSegment); }
Example 8
Source File: FlinkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override public void open(Configuration parameters) throws Exception { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); } }
Example 9
Source File: DictionaryGeneratorCLI.java From kylin with Apache License 2.0 | 6 votes |
public static void processSegment(KylinConfig config, String cubeName, String segmentID, String uuid, DistinctColumnValuesProvider factTableValueProvider, DictionaryProvider dictProvider) throws IOException { CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeSegment segment = cube.getSegmentById(segmentID); int retryTime = 0; while (retryTime < 3) { if (retryTime > 0) { logger.info("Rebuild dictionary and snapshot for Cube: {}, Segment: {}, {} times.", cubeName, segmentID, retryTime); } processSegment(config, segment, uuid, factTableValueProvider, dictProvider); if (isAllDictsAndSnapshotsReady(config, cubeName, segmentID)) { break; } retryTime++; } if (retryTime >= 3) { logger.error("Not all dictionaries and snapshots ready for cube segment: {}", segmentID); } else { logger.info("Succeed to build all dictionaries and snapshots for cube segment: {}", segmentID); } }
Example 10
Source File: CubeBuildJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void updateSegmentSourceBytesSize(String cubeId, Map<String, Object> toUpdateSegmentSourceSize) throws IOException { CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId); CubeInstance cubeCopy = cubeInstance.latestCopyForWrite(); CubeUpdate update = new CubeUpdate(cubeCopy); List<CubeSegment> cubeSegments = Lists.newArrayList(); for (Map.Entry<String, Object> entry : toUpdateSegmentSourceSize.entrySet()) { CubeSegment segment = cubeCopy.getSegmentById(entry.getKey()); segment.setInputRecordsSize((Long) entry.getValue()); segment.setLastBuildTime(System.currentTimeMillis()); cubeSegments.add(segment); } update.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0])); cubeManager.updateCube(update); }
Example 11
Source File: SparkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); }
Example 12
Source File: UpdateCubeInfoAfterBuildStep.java From kylin with Apache License 2.0 | 5 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeManager = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())) .latestCopyForWrite(); final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); long sourceCount = cubingJob.findSourceRecordCount(); long sourceSizeBytes = cubingJob.findSourceSizeBytes(); long cubeSizeBytes = cubingJob.findCubeSizeBytes(); KylinConfig config = KylinConfig.getInstanceFromEnv(); List<Double> cuboidEstimateRatio = cubingJob.findEstimateRatio(segment, config); segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams())); segment.setLastBuildTime(System.currentTimeMillis()); segment.setSizeKB(cubeSizeBytes / 1024); segment.setInputRecords(sourceCount); segment.setInputRecordsSize(sourceSizeBytes); segment.setEstimateRatio(cuboidEstimateRatio); try { deleteDictionaryIfNeeded(segment); saveExtSnapshotIfNeeded(cubeManager, cube, segment); updateSegment(segment); cubeManager.promoteNewlyBuiltSegments(cube, segment); return new ExecuteResult(); } catch (IOException e) { logger.error("fail to update cube after build", e); return ExecuteResult.createError(e); } }
Example 13
Source File: SparkCubingMerge.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void init() { this.kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); final CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cube.getDescName()); final CubeSegment sourceSeg = cube.getSegmentById(sourceSegmentId); final CubeSegment mergedSeg = cube.getSegmentById(mergedSegmentId); this.segmentReEncoder = new SegmentReEncoder(cubeDesc, sourceSeg, mergedSeg, kylinConfig); }
Example 14
Source File: CubeMergeJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void mergeSegments(String cubeId, String segmentId) throws IOException { CubeManager mgr = CubeManager.getInstance(config); CubeInstance cube = mgr.getCubeByUuid(cubeId); CubeSegment mergedSeg = cube.getSegmentById(segmentId); SegmentInfo mergedSegInfo = ManagerHub.getSegmentInfo(config, getParam(MetadataConstants.P_CUBE_ID), mergedSeg.getUuid()); Map<Long, DFLayoutMergeAssist> mergeCuboidsAssist = generateMergeAssist(mergingSegInfos, ss); for (DFLayoutMergeAssist assist : mergeCuboidsAssist.values()) { SpanningTree spanningTree = new ForestSpanningTree(JavaConversions.asJavaCollection(mergedSegInfo.toBuildLayouts())); Dataset<Row> afterMerge = assist.merge(config, cube.getName()); LayoutEntity layout = assist.getLayout(); Dataset<Row> afterSort; if (layout.isTableIndex()) { afterSort = afterMerge.sortWithinPartitions(NSparkCubingUtil.getColumns(layout.getOrderedDimensions().keySet())); } else { Column[] dimsCols = NSparkCubingUtil.getColumns(layout.getOrderedDimensions().keySet()); Dataset<Row> afterAgg = CuboidAggregator.agg(ss, afterMerge, layout.getOrderedDimensions().keySet(), layout.getOrderedMeasures(), spanningTree, false); afterSort = afterAgg.sortWithinPartitions(dimsCols); } buildLayoutWithUpdate.submit(new BuildLayoutWithUpdate.JobEntity() { @Override public String getName() { return "merge-layout-" + layout.getId(); } @Override public LayoutEntity build() throws IOException { return saveAndUpdateCuboid(afterSort, mergedSegInfo, layout, assist); } }, config); buildLayoutWithUpdate.updateLayout(mergedSegInfo, config); } }
Example 15
Source File: SparkBuildDictionary.java From kylin with Apache License 2.0 | 5 votes |
private String buildSnapshotTable(KylinConfig config, CubeSegment cubeSeg, String lookupTable, String uuid) throws IOException{ CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid()); TableMetadataManager metaMgr = TableMetadataManager.getInstance(config); SnapshotManager snapshotMgr = SnapshotManager.getInstance(config); TableDesc tableDesc = new TableDesc(metaMgr.getTableDesc(lookupTable, segCopy.getProject())); IReadableTable hiveTable = SourceManager.createReadableTable(tableDesc, uuid); SnapshotTable snapshot = snapshotMgr.buildSnapshot(hiveTable, tableDesc, cubeSeg.getConfig()); return snapshot.getResourcePath(); }
Example 16
Source File: JobStepFactory.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public static NSparkExecutable addStep(DefaultChainedExecutable parent, JobStepType type, CubeInstance cube) { NSparkExecutable step; KylinConfig config = cube.getConfig(); switch (type) { case RESOURCE_DETECT: step = new NResourceDetectStep(parent); break; case CUBING: step = new NSparkCubingStep(config.getSparkBuildClassName()); break; case MERGING: step = new NSparkMergingStep(config.getSparkMergeClassName()); break; case CLEAN_UP_AFTER_MERGE: step = new NSparkUpdateMetaAndCleanupAfterMergeStep(); break; default: throw new IllegalArgumentException(); } step.setParams(parent.getParams()); step.setProject(parent.getProject()); step.setTargetSubject(parent.getTargetSubject()); if (step instanceof NSparkUpdateMetaAndCleanupAfterMergeStep) { CubeSegment mergeSegment = cube.getSegmentById(parent.getTargetSegments().iterator().next()); final Segments<CubeSegment> mergingSegments = cube.getMergingSegments(mergeSegment); step.setParam(MetadataConstants.P_SEGMENT_NAMES, String.join(",", NSparkCubingUtil.toSegmentNames(mergingSegments))); step.setParam(CubingExecutableUtil.SEGMENT_ID, parent.getParam(CubingExecutableUtil.SEGMENT_ID)); step.setParam(MetadataConstants.P_JOB_TYPE, parent.getParam(MetadataConstants.P_JOB_TYPE)); step.setParam(MetadataConstants.P_OUTPUT_META_URL, parent.getParam(MetadataConstants.P_OUTPUT_META_URL)); } parent.addTask(step); //after addTask, step's id is changed step.setDistMetaUrl(config.getJobTmpMetaStoreUrl(parent.getProject(), step.getId())); return step; }
Example 17
Source File: CalculateStatsFromBaseCuboidMapper.java From kylin with Apache License 2.0 | 4 votes |
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); HadoopUtil.setCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); CubeSegment cubeSegment = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId(); nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length; String cuboidModeName = conf.get(BatchConstants.CFG_CUBOID_MODE); Set<Long> cuboidIdSet = cube.getCuboidsByMode(cuboidModeName); cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]); allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey); samplingPercentage = Integer .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); allCuboidsHLL = new HLLCounter[cuboidIds.length]; for (int i = 0; i < cuboidIds.length; i++) { allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()); } //for KYLIN-2518 backward compatibility if (KylinVersion.isBefore200(cubeDesc.getVersion())) { isUsePutRowKeyToHllNewAlgorithm = false; hf = Hashing.murmur3_32(); logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", cubeDesc.getVersion()); } else { isUsePutRowKeyToHllNewAlgorithm = true; rowHashCodesLong = new long[nRowKey]; hf = Hashing.murmur3_128(); logger.info( "Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", cubeDesc.getVersion()); } rowKeyDecoder = new RowKeyDecoder(cubeSegment); }
Example 18
Source File: JobService.java From kylin with Apache License 2.0 | 4 votes |
public JobInstance submitRecoverSegmentOptimizeJob(CubeSegment segment, String submitter) throws IOException, JobException { CubeInstance cubeInstance = segment.getCubeInstance(); checkCubeDescSignature(cubeInstance); String cubeName = cubeInstance.getName(); List<JobInstance> jobInstanceList = searchJobsByCubeName(cubeName, null, Lists.newArrayList(JobStatusEnum.NEW, JobStatusEnum.PENDING, JobStatusEnum.ERROR), JobTimeFilterEnum.ALL, JobSearchMode.CHECKPOINT_ONLY); if (jobInstanceList.size() > 1) { throw new IllegalStateException("Exist more than one CheckpointExecutable for cube " + cubeName); } else if (jobInstanceList.size() == 0) { throw new IllegalStateException("There's no CheckpointExecutable for cube " + cubeName); } CheckpointExecutable checkpointExecutable = (CheckpointExecutable) getExecutableManager() .getJob(jobInstanceList.get(0).getId()); AbstractExecutable toBeReplaced = null; for (AbstractExecutable taskForCheck : checkpointExecutable.getSubTasksForCheck()) { if (taskForCheck instanceof CubingJob) { CubingJob subCubingJob = (CubingJob) taskForCheck; String segmentName = CubingExecutableUtil.getSegmentName(subCubingJob.getParams()); if (segmentName != null && segmentName.equals(segment.getName())) { String segmentID = CubingExecutableUtil.getSegmentId(subCubingJob.getParams()); CubeSegment beingOptimizedSegment = cubeInstance.getSegmentById(segmentID); if (beingOptimizedSegment != null) { // beingOptimizedSegment exists & should not be recovered throw new IllegalStateException("Segment " + beingOptimizedSegment.getName() + "-" + beingOptimizedSegment.getUuid() + " still exists. Please delete it or discard the related optimize job first!!!"); } toBeReplaced = taskForCheck; break; } } } if (toBeReplaced == null) { throw new IllegalStateException("There's no CubingJob for segment " + segment.getName() + " in CheckpointExecutable " + checkpointExecutable.getName()); } /** Add CubingJob for the related segment **/ CubeSegment optimizeSegment = getCubeManager().appendSegment(cubeInstance, segment.getTSRange()); DefaultChainedExecutable optimizeJob = EngineFactory.createBatchOptimizeJob(optimizeSegment, submitter); getExecutableManager().addJob(optimizeJob); JobInstance optimizeJobInstance = getSingleJobInstance(optimizeJob); /** Update the checkpoint job */ checkpointExecutable.getSubTasksForCheck().set(checkpointExecutable.getSubTasksForCheck().indexOf(toBeReplaced), optimizeJob); getExecutableManager().updateCheckpointJob(checkpointExecutable.getId(), checkpointExecutable.getSubTasksForCheck()); return optimizeJobInstance; }
Example 19
Source File: CalculateStatsFromBaseCuboidJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_STATISTICS_SAMPLING_PERCENT); options.addOption(OPTION_CUBOID_MODE); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); String segmentID = getOptionValue(OPTION_SEGMENT_ID); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String statistics_sampling_percent = getOptionValue(OPTION_STATISTICS_SAMPLING_PERCENT); String cuboidMode = getOptionValue(OPTION_CUBOID_MODE); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment cubeSegment = cube.getSegmentById(segmentID); job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidMode); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); job.getConfiguration().set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, statistics_sampling_percent); logger.info("Starting: " + job.getJobName()); setJobClasspath(job, cube.getConfig()); setupMapper(input); setupReducer(output, cubeSegment); attachSegmentMetadataWithDict(cubeSegment, job.getConfiguration()); return waitForCompletion(job); } catch (Exception e) { logger.error("error in CalculateStatsFromBaseCuboidJob", e); printUsage(options); throw e; } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 20
Source File: CuboidJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { if (this.mapperClass == null) throw new Exception("Mapper class is not set!"); Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_NCUBOID_LEVEL); options.addOption(OPTION_CUBING_JOB_ID); options.addOption(OPTION_CUBOID_MODE); options.addOption(OPTION_DICTIONARY_SHRUNKEN_PATH); parseOptions(options, args); String output = getOptionValue(OPTION_OUTPUT_PATH); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); int nCuboidLevel = Integer.parseInt(getOptionValue(OPTION_NCUBOID_LEVEL)); String segmentID = getOptionValue(OPTION_SEGMENT_ID); String cubingJobId = getOptionValue(OPTION_CUBING_JOB_ID); String cuboidModeName = getOptionValue(OPTION_CUBOID_MODE); if (cuboidModeName == null) { cuboidModeName = CuboidModeEnum.CURRENT.toString(); } CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment segment = cube.getSegmentById(segmentID); cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(segment, cuboidModeName); if (checkSkip(cubingJobId, nCuboidLevel)) { logger.info( "Skip job " + getOptionValue(OPTION_JOB_NAME) + " for " + segmentID + "[" + segmentID + "]"); return 0; } job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, cubingJobId); String shrunkenDictPath = getOptionValue(OPTION_DICTIONARY_SHRUNKEN_PATH); if (shrunkenDictPath != null) { job.getConfiguration().set(BatchConstants.ARG_SHRUNKEN_DICT_PATH, shrunkenDictPath); } logger.info("Starting: " + job.getJobName()); setJobClasspath(job, cube.getConfig()); // add metadata to distributed cache attachSegmentMetadataWithAll(segment, job.getConfiguration()); // Mapper job.setMapperClass(this.mapperClass); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others // Reducer job.setReducerClass(CuboidReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set input configureMapperInputFormat(segment); // set output IMROutput2.IMROutputFormat outputFormat = MRUtil.getBatchCubingOutputSide2(segment).getOutputFormat(); outputFormat.configureJobOutput(job, output, segment, cuboidScheduler, nCuboidLevel); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); job.getConfiguration().setInt(BatchConstants.CFG_CUBE_CUBOID_LEVEL, nCuboidLevel); job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidModeName); return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }