org.apache.kylin.cube.CubeInstance#getSegmentById

Source File: JobService.java From Kylin with Apache License 2.0

6 votes

@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN + " or hasPermission(#job, 'ADMINISTRATION') or hasPermission(#job, 'OPERATION') or hasPermission(#job, 'MANAGEMENT')")
public JobInstance cancelJob(String jobId) throws IOException, JobException {
    //        CubeInstance cube = this.getCubeManager().getCube(job.getRelatedCube());
    //        for (BuildCubeJob cubeJob: listAllCubingJobs(cube.getName(), null, EnumSet.of(ExecutableState.READY, ExecutableState.RUNNING))) {
    //            getExecutableManager().stopJob(cubeJob.getId());
    //        }
    final JobInstance jobInstance = getJobInstance(jobId);
    final String segmentId = jobInstance.getRelatedSegment();
    CubeInstance cubeInstance = getCubeManager().getCube(jobInstance.getRelatedCube());
    final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
    if (segment.getStatus() == SegmentStatusEnum.NEW) {
        cubeInstance.getSegments().remove(segment);
        getCubeManager().updateCube(cubeInstance);
    }
    getExecutableManager().discardJob(jobId);
    return jobInstance;
}

Source File: CuboidShardUtil.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public static void saveCuboidShards(CubeSegment segment, Map<Long, Short> cuboidShards, int totalShards) throws IOException {
    CubeManager cubeManager = CubeManager.getInstance(segment.getConfig());

    Map<Long, Short> filtered = Maps.newHashMap();
    for (Map.Entry<Long, Short> entry : cuboidShards.entrySet()) {
        if (entry.getValue() > 1) {
            filtered.put(entry.getKey(), entry.getValue());
        }
    }
    
    // work on copy instead of cached objects
    CubeInstance cubeCopy = segment.getCubeInstance().latestCopyForWrite();
    CubeSegment segCopy = cubeCopy.getSegmentById(segment.getUuid());

    segCopy.setCuboidShardNums(filtered);
    segCopy.setTotalShards(totalShards);

    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(segCopy);
    cubeManager.updateCube(update);
}

Source File: JobService.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void cancelCubingJobInner(CubingJob cubingJob) throws IOException {
    CubeInstance cubeInstance = getCubeManager().getCube(CubingExecutableUtil.getCubeName(cubingJob.getParams()));
    // might not a cube job
    final String segmentIds = CubingExecutableUtil.getSegmentId(cubingJob.getParams());
    if (!StringUtils.isEmpty(segmentIds)) {
        for (String segmentId : StringUtils.split(segmentIds)) {
            final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
            if (segment != null
                    && (segment.getStatus() == SegmentStatusEnum.NEW || segment.getTSRange().end.v == 0)) {
                // Remove this segment
                getCubeManager().updateCubeDropSegments(cubeInstance, segment);
            }
        }
    }
    getExecutableManager().discardJob(cubingJob.getId());
}

Source File: CopyDictionaryStep.java From kylin with Apache License 2.0

6 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())).latestCopyForWrite();
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    // --- Copy dictionary
    optimizeSegment.getDictionaries().putAll(oldSegment.getDictionaries());
    optimizeSegment.getSnapshots().putAll(oldSegment.getSnapshots());
    optimizeSegment.getRowkeyStats().addAll(oldSegment.getRowkeyStats());

    try {
        CubeUpdate cubeBuilder = new CubeUpdate(cube);
        cubeBuilder.setToUpdateSegs(optimizeSegment);
        mgr.updateCube(cubeBuilder);
    } catch (IOException e) {
        logger.error("fail to merge dictionary or lookup snapshots", e);
        return ExecuteResult.createError(e);
    }

    return new ExecuteResult();
}

Source File: JobService.java From kylin with Apache License 2.0

6 votes

private void cancelCubingJobInner(CubingJob cubingJob) throws IOException {
    CubeInstance cubeInstance = getCubeManager().getCube(CubingExecutableUtil.getCubeName(cubingJob.getParams()));
    // might not a cube job
    final String segmentIds = CubingExecutableUtil.getSegmentId(cubingJob.getParams());
    if (!StringUtils.isEmpty(segmentIds)) {
        for (String segmentId : StringUtils.split(segmentIds)) {
            final CubeSegment segment = cubeInstance.getSegmentById(segmentId);
            if (segment != null
                    && (segment.getStatus() == SegmentStatusEnum.NEW || segment.getTSRange().end.v == 0)) {
                // Remove this segment
                getCubeManager().updateCubeDropSegments(cubeInstance, segment);
            }
        }
    }
    getExecutableManager().discardJob(cubingJob.getId());
}

Source File: SparkExecutable.java From kylin with Apache License 2.0

6 votes

private void updateSparkDimensionDicMetadata(KylinConfig config, CubeInstance cube, String segmentId)
        throws IOException {
    KylinConfig hdfsConfig = AbstractHadoopJob
            .loadKylinConfigFromHdfs(this.getParam(SparkBuildDictionary.OPTION_META_URL.getOpt()));
    CubeInstance cubeInstance = CubeManager.getInstance(hdfsConfig).reloadCube(cube.getName());
    CubeSegment segment = cubeInstance.getSegmentById(segmentId);

    CubeSegment oldSeg = cube.getSegmentById(segmentId);
    oldSeg.setDictionaries((ConcurrentHashMap<String, String>) segment.getDictionaries());
    oldSeg.setSnapshots((ConcurrentHashMap) segment.getSnapshots());
    oldSeg.getRowkeyStats().addAll(segment.getRowkeyStats());
    CubeInstance cubeCopy = cube.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    update.setToUpdateSegs(oldSeg);
    CubeManager.getInstance(config).updateCube(update);

    Set<String> dumpList = new LinkedHashSet<>();
    dumpList.addAll(segment.getDictionaryPaths());
    dumpList.addAll(segment.getSnapshotPaths());

    JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segment.getConfig(),
            config.getMetadataUrl().toString());
}

Source File: NDCuboidMapper.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());

    cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);
    String cuboidModeName = context.getConfiguration().get(BatchConstants.CFG_CUBOID_MODE);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeDesc = cube.getDescriptor();
    cubeSegment = cube.getSegmentById(segmentID);
    ndCuboidBuilder = new NDCuboidBuilder(cubeSegment);
    // initialize CubiodScheduler
    cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSegment, cuboidModeName);
    rowKeySplitter = new RowKeySplitter(cubeSegment);
}

Source File: FlinkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
public void open(Configuration parameters) throws Exception {
    KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
        CubeDesc cubeDesc = cubeInstance.getDescriptor();
        CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
        CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
        long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
        Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
        baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
    }
}

Source File: DictionaryGeneratorCLI.java From kylin with Apache License 2.0

6 votes

public static void processSegment(KylinConfig config, String cubeName, String segmentID, String uuid,
        DistinctColumnValuesProvider factTableValueProvider, DictionaryProvider dictProvider) throws IOException {
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment segment = cube.getSegmentById(segmentID);

    int retryTime = 0;
    while (retryTime < 3) {
        if (retryTime > 0) {
            logger.info("Rebuild dictionary and snapshot for Cube: {}, Segment: {}, {} times.", cubeName, segmentID,
                    retryTime);
        }

        processSegment(config, segment, uuid, factTableValueProvider, dictProvider);

        if (isAllDictsAndSnapshotsReady(config, cubeName, segmentID)) {
            break;
        }
        retryTime++;
    }

    if (retryTime >= 3) {
        logger.error("Not all dictionaries and snapshots ready for cube segment: {}", segmentID);
    } else {
        logger.info("Succeed to build all dictionaries and snapshots for cube segment: {}", segmentID);
    }
}

Source File: CubeBuildJob.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void updateSegmentSourceBytesSize(String cubeId, Map<String, Object> toUpdateSegmentSourceSize)
        throws IOException {
    CubeInstance cubeInstance = cubeManager.getCubeByUuid(cubeId);
    CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
    CubeUpdate update = new CubeUpdate(cubeCopy);
    List<CubeSegment> cubeSegments = Lists.newArrayList();
    for (Map.Entry<String, Object> entry : toUpdateSegmentSourceSize.entrySet()) {
        CubeSegment segment = cubeCopy.getSegmentById(entry.getKey());
        segment.setInputRecordsSize((Long) entry.getValue());
        segment.setLastBuildTime(System.currentTimeMillis());
        cubeSegments.add(segment);
    }
    update.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
    cubeManager.updateCube(update);
}

Source File: SparkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception {
    if (initialized == false) {
        synchronized (SparkCubingByLayer.class) {
            if (initialized == false) {
                KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
                try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
                        .setAndUnsetThreadLocalConfig(kConfig)) {
                    CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
                    CubeDesc cubeDesc = cubeInstance.getDescriptor();
                    CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
                    CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                            EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
                    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
                    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
                    baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                            AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                            MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
                    initialized = true;
                }
            }
        }
    }
    baseCuboidBuilder.resetAggrs();
    byte[] rowKey = baseCuboidBuilder.buildKey(rowArray);
    Object[] result = baseCuboidBuilder.buildValueObjects(rowArray);
    return new Tuple2<>(new ByteArray(rowKey), result);
}

Source File: UpdateCubeInfoAfterBuildStep.java From kylin with Apache License 2.0

5 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeManager = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams()))
            .latestCopyForWrite();
    final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
    long sourceCount = cubingJob.findSourceRecordCount();
    long sourceSizeBytes = cubingJob.findSourceSizeBytes();
    long cubeSizeBytes = cubingJob.findCubeSizeBytes();

    KylinConfig config = KylinConfig.getInstanceFromEnv();
    List<Double> cuboidEstimateRatio = cubingJob.findEstimateRatio(segment, config);

    segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams()));
    segment.setLastBuildTime(System.currentTimeMillis());
    segment.setSizeKB(cubeSizeBytes / 1024);
    segment.setInputRecords(sourceCount);
    segment.setInputRecordsSize(sourceSizeBytes);
    segment.setEstimateRatio(cuboidEstimateRatio);

    try {
        deleteDictionaryIfNeeded(segment);
        saveExtSnapshotIfNeeded(cubeManager, cube, segment);
        updateSegment(segment);

        cubeManager.promoteNewlyBuiltSegments(cube, segment);
        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to update cube after build", e);
        return ExecuteResult.createError(e);
    }
}

Source File: SparkCubingMerge.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void init() {
    this.kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    final CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cube.getDescName());
    final CubeSegment sourceSeg = cube.getSegmentById(sourceSegmentId);
    final CubeSegment mergedSeg = cube.getSegmentById(mergedSegmentId);
    this.segmentReEncoder = new SegmentReEncoder(cubeDesc, sourceSeg, mergedSeg, kylinConfig);
}

Source File: CubeMergeJob.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void mergeSegments(String cubeId, String segmentId) throws IOException {
    CubeManager mgr = CubeManager.getInstance(config);
    CubeInstance cube = mgr.getCubeByUuid(cubeId);
    CubeSegment mergedSeg = cube.getSegmentById(segmentId);
    SegmentInfo mergedSegInfo = ManagerHub.getSegmentInfo(config, getParam(MetadataConstants.P_CUBE_ID), mergedSeg.getUuid());

    Map<Long, DFLayoutMergeAssist> mergeCuboidsAssist = generateMergeAssist(mergingSegInfos, ss);
    for (DFLayoutMergeAssist assist : mergeCuboidsAssist.values()) {
        SpanningTree spanningTree = new ForestSpanningTree(JavaConversions.asJavaCollection(mergedSegInfo.toBuildLayouts()));
        Dataset<Row> afterMerge = assist.merge(config, cube.getName());
        LayoutEntity layout = assist.getLayout();

        Dataset<Row> afterSort;
        if (layout.isTableIndex()) {
            afterSort = afterMerge.sortWithinPartitions(NSparkCubingUtil.getColumns(layout.getOrderedDimensions().keySet()));
        } else {
            Column[] dimsCols = NSparkCubingUtil.getColumns(layout.getOrderedDimensions().keySet());
            Dataset<Row> afterAgg = CuboidAggregator.agg(ss, afterMerge, layout.getOrderedDimensions().keySet(),
                    layout.getOrderedMeasures(), spanningTree, false);
            afterSort = afterAgg.sortWithinPartitions(dimsCols);
        }
        buildLayoutWithUpdate.submit(new BuildLayoutWithUpdate.JobEntity() {
            @Override
            public String getName() {
                return "merge-layout-" + layout.getId();
            }

            @Override
            public LayoutEntity build() throws IOException {
                return saveAndUpdateCuboid(afterSort, mergedSegInfo, layout, assist);
            }
        }, config);

        buildLayoutWithUpdate.updateLayout(mergedSegInfo, config);
    }
}

Source File: SparkBuildDictionary.java From kylin with Apache License 2.0

5 votes

private String buildSnapshotTable(KylinConfig config, CubeSegment cubeSeg, String lookupTable, String uuid) throws IOException{
    CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy
    CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid());

    TableMetadataManager metaMgr = TableMetadataManager.getInstance(config);
    SnapshotManager snapshotMgr = SnapshotManager.getInstance(config);

    TableDesc tableDesc = new TableDesc(metaMgr.getTableDesc(lookupTable, segCopy.getProject()));
    IReadableTable hiveTable = SourceManager.createReadableTable(tableDesc, uuid);
    SnapshotTable snapshot = snapshotMgr.buildSnapshot(hiveTable, tableDesc, cubeSeg.getConfig());
    return snapshot.getResourcePath();
}

Source File: JobStepFactory.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public static NSparkExecutable addStep(DefaultChainedExecutable parent, JobStepType type,
        CubeInstance cube) {
    NSparkExecutable step;
    KylinConfig config = cube.getConfig();
    switch (type) {
    case RESOURCE_DETECT:
        step = new NResourceDetectStep(parent);
        break;
    case CUBING:
        step = new NSparkCubingStep(config.getSparkBuildClassName());
        break;
    case MERGING:
        step = new NSparkMergingStep(config.getSparkMergeClassName());
        break;
    case CLEAN_UP_AFTER_MERGE:
        step = new NSparkUpdateMetaAndCleanupAfterMergeStep();
        break;
    default:
        throw new IllegalArgumentException();
    }

    step.setParams(parent.getParams());
    step.setProject(parent.getProject());
    step.setTargetSubject(parent.getTargetSubject());
    if (step instanceof NSparkUpdateMetaAndCleanupAfterMergeStep) {
        CubeSegment mergeSegment = cube.getSegmentById(parent.getTargetSegments().iterator().next());
        final Segments<CubeSegment> mergingSegments = cube.getMergingSegments(mergeSegment);
        step.setParam(MetadataConstants.P_SEGMENT_NAMES,
                String.join(",", NSparkCubingUtil.toSegmentNames(mergingSegments)));
        step.setParam(CubingExecutableUtil.SEGMENT_ID, parent.getParam(CubingExecutableUtil.SEGMENT_ID));
        step.setParam(MetadataConstants.P_JOB_TYPE, parent.getParam(MetadataConstants.P_JOB_TYPE));
        step.setParam(MetadataConstants.P_OUTPUT_META_URL, parent.getParam(MetadataConstants.P_OUTPUT_META_URL));
    }
    parent.addTask(step);
    //after addTask, step's id is changed
    step.setDistMetaUrl(config.getJobTmpMetaStoreUrl(parent.getProject(), step.getId()));
    return step;
}

Source File: CalculateStatsFromBaseCuboidMapper.java From kylin with Apache License 2.0

4 votes

@Override
protected void doSetup(Context context) throws IOException {
    Configuration conf = context.getConfiguration();
    HadoopUtil.setCurrentConfiguration(conf);
    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    CubeSegment cubeSegment = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID));

    baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
    nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;

    String cuboidModeName = conf.get(BatchConstants.CFG_CUBOID_MODE);
    Set<Long> cuboidIdSet = cube.getCuboidsByMode(cuboidModeName);

    cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
    allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);

    samplingPercentage = Integer
            .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));

    allCuboidsHLL = new HLLCounter[cuboidIds.length];
    for (int i = 0; i < cuboidIds.length; i++) {
        allCuboidsHLL[i] = new HLLCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
    }

    //for KYLIN-2518 backward compatibility
    if (KylinVersion.isBefore200(cubeDesc.getVersion())) {
        isUsePutRowKeyToHllNewAlgorithm = false;
        hf = Hashing.murmur3_32();
        logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", cubeDesc.getVersion());
    } else {
        isUsePutRowKeyToHllNewAlgorithm = true;
        rowHashCodesLong = new long[nRowKey];
        hf = Hashing.murmur3_128();
        logger.info(
                "Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518",
                cubeDesc.getVersion());
    }

    rowKeyDecoder = new RowKeyDecoder(cubeSegment);
}

Source File: JobService.java From kylin with Apache License 2.0

4 votes

public JobInstance submitRecoverSegmentOptimizeJob(CubeSegment segment, String submitter)
        throws IOException, JobException {
    CubeInstance cubeInstance = segment.getCubeInstance();

    checkCubeDescSignature(cubeInstance);

    String cubeName = cubeInstance.getName();
    List<JobInstance> jobInstanceList = searchJobsByCubeName(cubeName, null,
            Lists.newArrayList(JobStatusEnum.NEW, JobStatusEnum.PENDING, JobStatusEnum.ERROR),
            JobTimeFilterEnum.ALL, JobSearchMode.CHECKPOINT_ONLY);
    if (jobInstanceList.size() > 1) {
        throw new IllegalStateException("Exist more than one CheckpointExecutable for cube " + cubeName);
    } else if (jobInstanceList.size() == 0) {
        throw new IllegalStateException("There's no CheckpointExecutable for cube " + cubeName);
    }
    CheckpointExecutable checkpointExecutable = (CheckpointExecutable) getExecutableManager()
            .getJob(jobInstanceList.get(0).getId());

    AbstractExecutable toBeReplaced = null;
    for (AbstractExecutable taskForCheck : checkpointExecutable.getSubTasksForCheck()) {
        if (taskForCheck instanceof CubingJob) {
            CubingJob subCubingJob = (CubingJob) taskForCheck;
            String segmentName = CubingExecutableUtil.getSegmentName(subCubingJob.getParams());
            if (segmentName != null && segmentName.equals(segment.getName())) {
                String segmentID = CubingExecutableUtil.getSegmentId(subCubingJob.getParams());
                CubeSegment beingOptimizedSegment = cubeInstance.getSegmentById(segmentID);
                if (beingOptimizedSegment != null) { // beingOptimizedSegment exists & should not be recovered
                    throw new IllegalStateException("Segment " + beingOptimizedSegment.getName() + "-"
                            + beingOptimizedSegment.getUuid()
                            + " still exists. Please delete it or discard the related optimize job first!!!");
                }
                toBeReplaced = taskForCheck;
                break;
            }
        }
    }
    if (toBeReplaced == null) {
        throw new IllegalStateException("There's no CubingJob for segment " + segment.getName()
                + " in CheckpointExecutable " + checkpointExecutable.getName());
    }

    /** Add CubingJob for the related segment **/
    CubeSegment optimizeSegment = getCubeManager().appendSegment(cubeInstance, segment.getTSRange());

    DefaultChainedExecutable optimizeJob = EngineFactory.createBatchOptimizeJob(optimizeSegment, submitter);

    getExecutableManager().addJob(optimizeJob);

    JobInstance optimizeJobInstance = getSingleJobInstance(optimizeJob);

    /** Update the checkpoint job */
    checkpointExecutable.getSubTasksForCheck().set(checkpointExecutable.getSubTasksForCheck().indexOf(toBeReplaced),
            optimizeJob);

    getExecutableManager().updateCheckpointJob(checkpointExecutable.getId(),
            checkpointExecutable.getSubTasksForCheck());

    return optimizeJobInstance;
}

Source File: CalculateStatsFromBaseCuboidJob.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_STATISTICS_SAMPLING_PERCENT);
        options.addOption(OPTION_CUBOID_MODE);
        parseOptions(options, args);

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME);
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String statistics_sampling_percent = getOptionValue(OPTION_STATISTICS_SAMPLING_PERCENT);
        String cuboidMode = getOptionValue(OPTION_CUBOID_MODE);

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment cubeSegment = cube.getSegmentById(segmentID);

        job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidMode);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        job.getConfiguration().set(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, statistics_sampling_percent);
        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        setupMapper(input);
        setupReducer(output, cubeSegment);

        attachSegmentMetadataWithDict(cubeSegment, job.getConfiguration());

        return waitForCompletion(job);

    } catch (Exception e) {
        logger.error("error in CalculateStatsFromBaseCuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

Source File: CuboidJob.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
public int run(String[] args) throws Exception {
    if (this.mapperClass == null)
        throw new Exception("Mapper class is not set!");

    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_NCUBOID_LEVEL);
        options.addOption(OPTION_CUBING_JOB_ID);
        options.addOption(OPTION_CUBOID_MODE);
        options.addOption(OPTION_DICTIONARY_SHRUNKEN_PATH);
        parseOptions(options, args);

        String output = getOptionValue(OPTION_OUTPUT_PATH);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        int nCuboidLevel = Integer.parseInt(getOptionValue(OPTION_NCUBOID_LEVEL));
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        String cubingJobId = getOptionValue(OPTION_CUBING_JOB_ID);
        String cuboidModeName = getOptionValue(OPTION_CUBOID_MODE);
        if (cuboidModeName == null) {
            cuboidModeName = CuboidModeEnum.CURRENT.toString();
        }

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegmentById(segmentID);

        cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(segment, cuboidModeName);

        if (checkSkip(cubingJobId, nCuboidLevel)) {
            logger.info(
                    "Skip job " + getOptionValue(OPTION_JOB_NAME) + " for " + segmentID + "[" + segmentID + "]");
            return 0;
        }

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, cubingJobId);
        String shrunkenDictPath = getOptionValue(OPTION_DICTIONARY_SHRUNKEN_PATH);
        if (shrunkenDictPath != null) {
            job.getConfiguration().set(BatchConstants.ARG_SHRUNKEN_DICT_PATH, shrunkenDictPath);
        }
        logger.info("Starting: " + job.getJobName());

        setJobClasspath(job, cube.getConfig());

        // add metadata to distributed cache
        attachSegmentMetadataWithAll(segment, job.getConfiguration());

        // Mapper
        job.setMapperClass(this.mapperClass);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others

        // Reducer
        job.setReducerClass(CuboidReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // set input
        configureMapperInputFormat(segment);

        // set output
        IMROutput2.IMROutputFormat outputFormat = MRUtil.getBatchCubingOutputSide2(segment).getOutputFormat();
        outputFormat.configureJobOutput(job, output, segment, cuboidScheduler, nCuboidLevel);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        job.getConfiguration().setInt(BatchConstants.CFG_CUBE_CUBOID_LEVEL, nCuboidLevel);
        job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidModeName);

        return waitForCompletion(job);
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

Java Code Examples for org.apache.kylin.cube.CubeInstance#getSegmentById()