org.apache.kylin.job.constant.ExecutableConstants Java Examples

The following examples show how to use org.apache.kylin.job.constant.ExecutableConstants. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HBaseMRSteps.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public AbstractExecutable createConvertCuboidToHfileStep(String jobId) {
    String cuboidRootPath = getCuboidRootPath(jobId);
    String inputPath = cuboidRootPath + (cuboidRootPath.endsWith("/") ? "" : "/") + "*";

    MapReduceExecutable createHFilesStep = new MapReduceExecutable();
    createHFilesStep.setName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_PARTITION, getRowkeyDistributionOutputPath(jobId) + "/part-r-00000_hfile");
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, inputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, getHFilePath(jobId));
    appendExecCmdParameters(cmd, BatchConstants.ARG_HTABLE_NAME, seg.getStorageLocationIdentifier());
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_HFile_Generator_" + seg.getRealization().getName() + "_Step");

    createHFilesStep.setMapReduceParams(cmd.toString());
    createHFilesStep.setMapReduceJobClass(CubeHFileJob.class);
    createHFilesStep.setCounterSaveAs(",," + CubingJob.CUBE_SIZE_BYTES);

    return createHFilesStep;
}
 
Example #2
Source File: CreateFlatHiveTableByLivyStep.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    stepLogger.setILogListener((infoKey, info) -> {
                // only care two properties here
                if (ExecutableConstants.YARN_APP_ID.equals(infoKey)
                        || ExecutableConstants.YARN_APP_URL.equals(infoKey)) {
                    getManager().addJobInfo(getId(), info);
                }
            }
    );
    KylinConfig config = getCubeSpecificConfig();
    try {
        createFlatHiveTable(config);
        return new ExecuteResult(ExecuteResult.State.SUCCEED, stepLogger.getBufferedLog());

    } catch (Exception e) {
        logger.error("job:" + getId() + " execute finished with exception", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, stepLogger.getBufferedLog(), e);
    }
}
 
Example #3
Source File: JobStepFactoryTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testAddStepInCubing() throws IOException {
    CubeManager cubeMgr = CubeManager.getInstance(config);
    CubeInstance cube = cubeMgr.getCube(CUBE_NAME);
    cleanupSegments(CUBE_NAME);
    CubeSegment oneSeg = cubeMgr.appendSegment(cube, new SegmentRange.TSRange(0L, Long.MAX_VALUE));
    Set<CubeSegment> segments = Sets.newHashSet(oneSeg);
    NSparkCubingJob job = NSparkCubingJob.create(segments, "ADMIN");
    Assert.assertEquals(CUBE_NAME, job.getParam(MetadataConstants.P_CUBE_NAME));

    NSparkExecutable resourceDetectStep = job.getResourceDetectStep();
    Assert.assertEquals(ResourceDetectBeforeCubingJob.class.getName(),
            resourceDetectStep.getSparkSubmitClassName());
    Assert.assertEquals(ExecutableConstants.STEP_NAME_DETECT_RESOURCE, resourceDetectStep.getName());
    job.getParams().forEach((key, value) -> Assert.assertEquals(value, resourceDetectStep.getParam(key)));
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), resourceDetectStep.getId()).toString(),
            resourceDetectStep.getDistMetaUrl());

    NSparkExecutable cubeStep = job.getSparkCubingStep();
    Assert.assertEquals(config.getSparkBuildClassName(), cubeStep.getSparkSubmitClassName());
    Assert.assertEquals(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE, cubeStep.getName());
    job.getParams().forEach((key, value) -> Assert.assertEquals(value, cubeStep.getParam(key)));
    Assert.assertEquals(config.getJobTmpMetaStoreUrl(getProject(), cubeStep.getId()).toString(),
            cubeStep.getDistMetaUrl());
}
 
Example #4
Source File: CreateFlatHiveTableStep.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected void createFlatHiveTable(KylinConfig config) throws IOException {
    final HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(getName());
    hiveCmdBuilder.overwriteHiveProps(config.getHiveConfigOverride());
    hiveCmdBuilder.addStatement(getInitStatement());
    hiveCmdBuilder.addStatement(getCreateTableStatement());
    final String cmd = hiveCmdBuilder.toString();

    stepLogger.log("Create and distribute table, cmd: ");
    stepLogger.log(cmd);

    Pair<Integer, String> response = config.getCliCommandExecutor().execute(cmd, stepLogger, null);
    Map<String, String> info = stepLogger.getInfo();

    //get the flat Hive table size
    Matcher matcher = HDFS_LOCATION.matcher(cmd);
    if (matcher.find()) {
        String hiveFlatTableHdfsUrl = matcher.group(1);
        long size = getFileSize(hiveFlatTableHdfsUrl);
        info.put(ExecutableConstants.HDFS_BYTES_WRITTEN, "" + size);
        logger.info("HDFS_Bytes_Writen: " + size);
    }
    getManager().addJobInfo(getId(), info);
    if (response.getFirst() != 0) {
        throw new RuntimeException("Failed to create flat hive table, error code " + response.getFirst());
    }
}
 
Example #5
Source File: FlinkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging));
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    final FlinkExecutable flinkExecutable = new FlinkExecutable();
    flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);

    flinkExecutable.setJobId(jobID);
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());

    return flinkExecutable;
}
 
Example #6
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));

    sparkExecutable.setClassName(SparkFactDistinct.class.getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
    sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #7
Source File: FlinkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    flinkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
    flinkExecutable.setJars(jars.toString());
    flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
 
Example #8
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
    step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);

    List<String> deleteTables = new ArrayList<>();
    deleteTables.add(getIntermediateTableIdentity());

    // mr-hive dict and inner table do not need delete hdfs
    String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
    if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
        String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
        String tableName = dictDb + "." + flatDesc.getTableName() + "_"
                + MRHiveDictUtil.DictHiveType.GroupBy.getName();
        deleteTables.add(tableName);
    }
    step.setIntermediateTables(deleteTables);

    step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
    step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
    jobFlow.addTask(step);
}
 
Example #9
Source File: SparkExecutable.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
protected void onExecuteStart(ExecutableContext executableContext) {
    final Output output = getOutput();
    if (output.getExtra().containsKey(START_TIME)) {
        final String sparkJobID = output.getExtra().get(ExecutableConstants.SPARK_JOB_ID);
        if (StringUtils.isEmpty(sparkJobID)) {
            getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            return;
        }
        try {
            String status = getAppState(sparkJobID);
            if (status == null || status.equals("FAILED") || status.equals("KILLED")) {
                //remove previous mr job info
                super.onExecuteStart(executableContext);
            } else {
                getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            }
        } catch (IOException e) {
            logger.warn("error get hadoop status");
            super.onExecuteStart(executableContext);
        }
    } else {
        super.onExecuteStart(executableContext);
    }
}
 
Example #10
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());

    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example #11
Source File: BatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
    mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");

    mergeDictionaryStep.setMapReduceParams(cmd.toString());
    mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);

    return mergeDictionaryStep;
}
 
Example #12
Source File: BatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected void addInMemCubingSteps(final CubingJob result, String jobId, String cuboidRootPath) {
    // base cuboid job
    MapReduceExecutable cubeStep = new MapReduceExecutable();

    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.IN_MEM_JOB_CONF_SUFFIX);

    cubeStep.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, cuboidRootPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Cube_Builder_" + seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);
    if (seg.getCubeDesc().isShrunkenDictFromGlobalEnabled()) {
        appendExecCmdParameters(cmd, BatchConstants.ARG_SHRUNKEN_DICT_PATH, getShrunkenDictionaryPath(jobId));
    }

    cubeStep.setMapReduceParams(cmd.toString());
    cubeStep.setMapReduceJobClass(getInMemCuboidJob());
    result.addTask(cubeStep);
}
 
Example #13
Source File: BatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createNDimensionCuboidStep(String parentPath, String outputPath, int level, String jobId) {
    // ND cuboid job
    MapReduceExecutable ndCuboidStep = new MapReduceExecutable();

    ndCuboidStep.setName(ExecutableConstants.STEP_NAME_BUILD_N_D_CUBOID + " : level " + level);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, parentPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_ND-Cuboid_Builder_" + seg.getRealization().getName() + "_Step");
    appendExecCmdParameters(cmd, BatchConstants.ARG_LEVEL, "" + level);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);

    ndCuboidStep.setMapReduceParams(cmd.toString());
    ndCuboidStep.setMapReduceJobClass(getNDCuboidJob());
    return ndCuboidStep;
}
 
Example #14
Source File: JobBuilderSupport.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createExtractDictionaryFromGlobalJob(String jobId) {
    MapReduceExecutable result = new MapReduceExecutable();
    result.setName(ExecutableConstants.STEP_NAME_EXTRACT_DICTIONARY_FROM_GLOBAL);
    result.setMapReduceJobClass(ExtractDictionaryFromGlobalJob.class);
    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Extract_Dictionary_from_Global_" + seg.getRealization().getName() + "_Step");
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, getShrunkenDictionaryPath(jobId));

    result.setMapReduceParams(cmd.toString());
    return result;
}
 
Example #15
Source File: BatchOptimizeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createFilterRecommendCuboidDataStep(String inputPath, String outputPath) {
    MapReduceExecutable result = new MapReduceExecutable();
    result.setName(ExecutableConstants.STEP_NAME_FILTER_RECOMMEND_CUBOID_DATA_FOR_OPTIMIZATION);

    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, inputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Filter_Recommend_Cuboid_Data_" + seg.getRealization().getName());

    result.setMapReduceParams(cmd.toString());
    result.setMapReduceJobClass(FilterRecommendCuboidDataJob.class);
    return result;
}
 
Example #16
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());

    sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
    sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());

    sparkExecutable.setJobId(jobId);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
    sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));

    StringBuilder jars = new StringBuilder();
    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    return sparkExecutable;
}
 
Example #17
Source File: BatchOptimizeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createUpdateShardForOldCuboidDataStep(String inputPath, String outputPath) {
    MapReduceExecutable result = new MapReduceExecutable();
    result.setName(ExecutableConstants.STEP_NAME_UPDATE_OLD_CUBOID_SHARD);

    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, inputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Update_Old_Cuboid_Shard_" + seg.getRealization().getName());

    result.setMapReduceParams(cmd.toString());
    result.setMapReduceJobClass(UpdateOldCuboidShardJob.class);
    return result;
}
 
Example #18
Source File: SparkBatchMergeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
    final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
    sparkExecutable.setClassName(SparkMergingDictionary.class.getName());

    sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
    sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));

    sparkExecutable.setJobId(jobID);
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
    sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());

    return sparkExecutable;
}
 
Example #19
Source File: BatchOptimizeJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createInMemCubingStep(String jobId, CuboidModeEnum cuboidMode, String cuboidRootPath) {
    MapReduceExecutable cubeStep = new MapReduceExecutable();

    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.IN_MEM_JOB_CONF_SUFFIX);

    cubeStep.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, getBaseCuboidPath(cuboidRootPath));
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, getInMemCuboidPath(cuboidRootPath));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Cube_Builder_" + seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBOID_MODE, cuboidMode.toString());

    cubeStep.setMapReduceParams(cmd.toString());
    cubeStep.setMapReduceJobClass(InMemCuboidFromBaseCuboidJob.class);
    cubeStep.setCounterSaveAs(
            CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES + "," + CubingJob.CUBE_SIZE_BYTES);
    return cubeStep;
}
 
Example #20
Source File: LookupSnapshotJobBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void addExtMaterializeLookupTableSteps(final LookupSnapshotBuildJob result,
        SnapshotTableDesc snapshotTableDesc) {
    LookupMaterializeContext lookupMaterializeContext = new LookupMaterializeContext(result);
    ILookupMaterializer materializer = MRUtil.getExtLookupMaterializer(snapshotTableDesc.getStorageType());
    materializer.materializeLookupTable(lookupMaterializeContext, cube, lookupTable);

    UpdateCubeAfterSnapshotStep afterSnapshotStep = new UpdateCubeAfterSnapshotStep();
    afterSnapshotStep.setName(ExecutableConstants.STEP_NAME_MATERIALIZE_LOOKUP_TABLE_UPDATE_CUBE);

    afterSnapshotStep.getParams().put(BatchConstants.ARG_EXT_LOOKUP_SNAPSHOTS_INFO, lookupMaterializeContext.getAllLookupSnapshotsInString());
    LookupExecutableUtil.setCubeName(cube.getName(), afterSnapshotStep.getParams());
    LookupExecutableUtil.setLookupTableName(lookupTable, afterSnapshotStep.getParams());
    LookupExecutableUtil.setSegments(segments, afterSnapshotStep.getParams());
    LookupExecutableUtil.setJobID(result.getId(), afterSnapshotStep.getParams());
    result.addTask(afterSnapshotStep);
}
 
Example #21
Source File: StreamingCubingJobBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createMergeDictStep(String streamingStoragePath, String jobId, DefaultChainedExecutable jobFlow) {
    MapReduceExecutable mergeDict = new MapReduceExecutable();
    mergeDict.setName(ExecutableConstants.STEP_NAME_STREAMING_CREATE_DICTIONARY);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            ExecutableConstants.STEP_NAME_STREAMING_CREATE_DICTIONARY);
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, streamingStoragePath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_NAME, seg.getName());
    //Instead of using mr job output, trySaveNewDict api is used, so output path is useless here
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, getDictPath(jobId));

    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    mergeDict.setMapReduceParams(cmd.toString());
    mergeDict.setMapReduceJobClass(MergeDictJob.class);
    mergeDict.setLockPathName(cubeName);
    mergeDict.setIsNeedLock(true);
    mergeDict.setIsNeedReleaseLock(false);
    mergeDict.setJobFlowJobId(jobFlow.getId());

    return mergeDict;

}
 
Example #22
Source File: StreamingCubingJobBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createInMemCubingStep(String jobId, CuboidModeEnum cuboidMode, String cuboidRootPath,
                                                  String tmpBaseCuboidPath) {
    MapReduceExecutable cubeStep = new MapReduceExecutable();

    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.IN_MEM_JOB_CONF_SUFFIX);

    cubeStep.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, tmpBaseCuboidPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, getInMemCuboidPath(cuboidRootPath));
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Cube_Builder_"
            + seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBOID_MODE, cuboidMode.toString());
    appendExecCmdParameters(cmd, BatchConstants.ARG_UPDATE_SHARD, "true");

    cubeStep.setMapReduceParams(cmd.toString());
    cubeStep.setMapReduceJobClass(InMemCuboidFromBaseCuboidJob.class);
    cubeStep.setCounterSaveAs(",,"
            + CubingJob.CUBE_SIZE_BYTES);
    return cubeStep;
}
 
Example #23
Source File: StreamingCubingJobBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createNDimensionCuboidStep(String parentPath, String outputPath, int level, String jobId) {
    // ND cuboid job
    MapReduceExecutable ndCuboidStep = new MapReduceExecutable();

    ndCuboidStep.setName(ExecutableConstants.STEP_NAME_BUILD_N_D_CUBOID + " : level " + level);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, parentPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_ND-Cuboid_Builder_"
            + seg.getRealization().getName() + "_Step");
    appendExecCmdParameters(cmd, BatchConstants.ARG_LEVEL, "" + level);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);

    ndCuboidStep.setMapReduceParams(cmd.toString());
    ndCuboidStep.setMapReduceJobClass(getNDCuboidJob());
    return ndCuboidStep;
}
 
Example #24
Source File: StreamingCubingJobBuilder.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private MapReduceExecutable createBaseCuboidStep(String streamingStoragePath, String basicCuboidOutputPath) {
    // base cuboid job
    MapReduceExecutable baseCuboidStep = new MapReduceExecutable();

    StringBuilder cmd = new StringBuilder();
    appendMapReduceParameters(cmd, JobEngineConfig.IN_MEM_JOB_CONF_SUFFIX);
    baseCuboidStep.setName(ExecutableConstants.STEP_NAME_STREAMING_BUILD_BASE_CUBOID);

    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_NAME, seg.getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, streamingStoragePath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, basicCuboidOutputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Base_Cuboid_Builder_"
            + seg.getRealization().getName());

    baseCuboidStep.setMapReduceParams(cmd.toString());
    baseCuboidStep.setMapReduceJobClass(ColumnToRowJob.class);
    // TODO need some way to get real source record count from fragment metadata
    baseCuboidStep.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES);
    return baseCuboidStep;
}
 
Example #25
Source File: SparkBatchCubingJobBuilder2.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
        final String jobId, final String cuboidRootPath) {
    final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
    final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
            seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
            tablePath);
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
            getSegmentMetadataUrl(seg.getConfig(), jobId));
    sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
    sparkExecutable.setJobId(jobId);

    StringBuilder jars = new StringBuilder();

    StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
    sparkExecutable.setJars(jars.toString());
    sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
 
Example #26
Source File: CubingJob.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected void updateMetrics(ExecutableContext context, ExecuteResult result, ExecutableState state) {
    JobMetricsFacade.JobStatisticsResult jobStats = new JobMetricsFacade.JobStatisticsResult();
    jobStats.setWrapper(getSubmitter(), getProjectName(), CubingExecutableUtil.getCubeName(getParams()), getId(),
            getJobType(), getAlgorithm() == null ? "NULL" : getAlgorithm().toString());

    if (state == ExecutableState.SUCCEED) {
        jobStats.setJobStats(findSourceSizeBytes(), findCubeSizeBytes(), getDuration(), getMapReduceWaitTime(),
                getPerBytesTimeCost(findSourceSizeBytes(), getDuration()));
        if (CubingJobTypeEnum.getByName(getJobType()) == CubingJobTypeEnum.BUILD) {
            jobStats.setJobStepStats(getTaskDurationByName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS),
                    getTaskDurationByName(ExecutableConstants.STEP_NAME_BUILD_DICTIONARY),
                    getTaskDurationByName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE),
                    getTaskDurationByName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE));
        }
    } else if (state == ExecutableState.ERROR) {
        jobStats.setJobException(result.getThrowable() != null ? result.getThrowable() : new Exception());
    }
    JobMetricsFacade.updateMetrics(jobStats);
}
 
Example #27
Source File: HBaseJobSteps.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public HadoopShellExecutable createCreateHTableStep(String jobId, CuboidModeEnum cuboidMode) {
    HadoopShellExecutable createHtableStep = new HadoopShellExecutable();
    createHtableStep.setName(ExecutableConstants.STEP_NAME_CREATE_HBASE_TABLE);
    StringBuilder cmd = new StringBuilder();
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_PARTITION,
            getRowkeyDistributionOutputPath(jobId) + "/part-r-00000");
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBOID_MODE, cuboidMode.toString());
    appendExecCmdParameters(cmd, BatchConstants.ARG_HBASE_CONF_PATH, getHBaseConfFilePath(jobId));

    createHtableStep.setJobParams(cmd.toString());
    createHtableStep.setJobClass(CreateHTableJob.class);

    return createHtableStep;
}
 
Example #28
Source File: HBaseJobSteps.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public MapReduceExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments,
        String jobID, Class<? extends AbstractHadoopJob> clazz) {
    final List<String> mergingCuboidPaths = Lists.newArrayList();
    for (CubeSegment merging : mergingSegments) {
        mergingCuboidPaths.add(getCuboidRootPath(merging) + "*");
    }
    String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
    String outputPath = getCuboidRootPath(jobID);

    MapReduceExecutable mergeCuboidDataStep = new MapReduceExecutable();
    mergeCuboidDataStep.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
    appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
    appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, formattedPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
    appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_Merge_Cuboid_" + seg.getCubeInstance().getName() + "_Step");

    mergeCuboidDataStep.setMapReduceParams(cmd.toString());
    mergeCuboidDataStep.setMapReduceJobClass(clazz);
    return mergeCuboidDataStep;
}
 
Example #29
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected static AbstractExecutable createMrHiveGlobalDictExtractStep(IJoinedFlatTableDesc flatDesc,
        String hiveInitStatements, String jobWorkingDir, String cubeName, String[] mrHiveDictColumns,
        String globalDictDatabase, String globalDictTable) {
    // Firstly, determine if the global dict hive table of cube is exists.
    String createGlobalDictTableHql = "CREATE TABLE IF NOT EXISTS " + globalDictDatabase + "." + globalDictTable
            + "\n" + "( dict_key STRING COMMENT '', \n" + "dict_val INT COMMENT '' \n" + ") \n"
            + "COMMENT '' \n" + "PARTITIONED BY (dict_column string) \n" + "STORED AS TEXTFILE; \n";

    final String dropDictIntermediateTableHql = MRHiveDictUtil.generateDropTableStatement(flatDesc);
    final String createDictIntermediateTableHql = MRHiveDictUtil.generateCreateTableStatement(flatDesc);

    StringBuilder insertDataToDictIntermediateTableSql = new StringBuilder();
    for (String dictColumn : mrHiveDictColumns) {
        insertDataToDictIntermediateTableSql
                .append(MRHiveDictUtil.generateInsertDataStatement(flatDesc, dictColumn));
    }

    CreateMrHiveDictStep step = new CreateMrHiveDictStep();
    step.setInitStatement(hiveInitStatements);
    step.setCreateTableStatement(createGlobalDictTableHql + dropDictIntermediateTableHql
            + createDictIntermediateTableHql + insertDataToDictIntermediateTableSql.toString());
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setName(ExecutableConstants.STEP_NAME_GLOBAL_DICT_MRHIVE_EXTRACT_DICTVAL);
    return step;
}
 
Example #30
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createRedistributeFlatHiveTableStep(String hiveInitStatements, String cubeName,
        IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) {
    RedistributeFlatHiveTableStep step = new RedistributeFlatHiveTableStep();
    step.setInitStatement(hiveInitStatements);
    step.setIntermediateTable(flatDesc.getTableName());
    step.setRedistributeDataStatement(JoinedFlatTable.generateRedistributeFlatTableStatement(flatDesc, cubeDesc));
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setName(ExecutableConstants.STEP_NAME_REDISTRIBUTE_FLAT_HIVE_TABLE);
    return step;
}