Java Code Examples for org.apache.kylin.common.util.StringUtil#appendWithSeparator()
The following examples show how to use
org.apache.kylin.common.util.StringUtil#appendWithSeparator() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); sparkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString()); }
Example 2
Source File: SparkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); sparkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString()); }
Example 3
Source File: SparkBatchMergeJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkMergingDictionary.class.getName()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ",")); sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID)); sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID)); sparkExecutable.setJobId(jobID); sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString()); sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 4
Source File: SparkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public SparkExecutable createBuildUHCDictSparkStep(String jobId) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkUHCDictionary.class.getName()); sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId)); sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId); sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 5
Source File: FlinkBatchMergeJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) { final List<String> mergingCuboidPaths = Lists.newArrayList(); for (CubeSegment merging : mergingSegments) { mergingCuboidPaths.add(getCuboidRootPath(merging)); } String formattedPath = StringUtil.join(mergingCuboidPaths, ","); String outputPath = getCuboidRootPath(jobID); final FlinkExecutable flinkExecutable = new FlinkExecutable(); flinkExecutable.setClassName(FlinkCubingMerge.class.getName()); flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath); flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath); flinkExecutable.setJobId(jobID); flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); return flinkExecutable; }
Example 6
Source File: FlinkBatchMergeJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) { final FlinkExecutable flinkExecutable = new FlinkExecutable(); flinkExecutable.setClassName(FlinkMergingDictionary.class.getName()); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ",")); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID)); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID)); flinkExecutable.setJobId(jobID); flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY); flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); return flinkExecutable; }
Example 7
Source File: FlinkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); flinkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE); }
Example 8
Source File: FlinkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) { final FlinkExecutable flinkExecutable = new FlinkExecutable(); final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent())); flinkExecutable.setJobId(jobId); flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS); flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); return flinkExecutable; }
Example 9
Source File: SparkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SparkExecutable createBuildDictionarySparkStep(String jobId) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkBuildDictionary.class.getName()); sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId)); sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 10
Source File: SparkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SparkExecutable createBuildUHCDictSparkStep(String jobId) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkUHCDictionary.class.getName()); sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId)); sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId); sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 11
Source File: SparkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setClassName(SparkFactDistinct.class.getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent())); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString()); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 12
Source File: SparkBatchMergeJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkMergingDictionary.class.getName()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ",")); sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID)); sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID)); sparkExecutable.setJobId(jobID); sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString()); sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 13
Source File: FlinkBatchMergeJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) { final List<String> mergingCuboidPaths = Lists.newArrayList(); for (CubeSegment merging : mergingSegments) { mergingCuboidPaths.add(getCuboidRootPath(merging)); } String formattedPath = StringUtil.join(mergingCuboidPaths, ","); String outputPath = getCuboidRootPath(jobID); final FlinkExecutable flinkExecutable = new FlinkExecutable(); flinkExecutable.setClassName(FlinkCubingMerge.class.getName()); flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath); flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath); flinkExecutable.setJobId(jobID); flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); return flinkExecutable; }
Example 14
Source File: FlinkBatchMergeJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) { final FlinkExecutable flinkExecutable = new FlinkExecutable(); flinkExecutable.setClassName(FlinkMergingDictionary.class.getName()); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ",")); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID)); flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID)); flinkExecutable.setJobId(jobID); flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY); flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); return flinkExecutable; }
Example 15
Source File: FlinkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); flinkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE); }
Example 16
Source File: SparkBatchMergeJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public SparkExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) { final List<String> mergingCuboidPaths = Lists.newArrayList(); for (CubeSegment merging : mergingSegments) { mergingCuboidPaths.add(getCuboidRootPath(merging)); } String formattedPath = StringUtil.join(mergingCuboidPaths, ","); String outputPath = getCuboidRootPath(jobID); final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkCubingMerge.class.getName()); sparkExecutable.setParam(SparkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath); sparkExecutable.setParam(SparkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); sparkExecutable.setParam(SparkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath); sparkExecutable.setJobId(jobID); sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID + ":" + seg.toString()); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 17
Source File: HiveInputBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
protected static AbstractExecutable createFlatHiveTableBySparkSql(String hiveInitStatements, String jobWorkingDir, String cubeName, IJoinedFlatTableDesc flatDesc) { final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc); final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir); String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc); KylinConfig config = flatDesc.getSegment().getConfig(); final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(config); sparkExecutable.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK); sparkExecutable.setClassName(SparkCreatingFlatTable.class.getName()); sparkExecutable.setParam(SparkSqlBatch.OPTION_CUBE_NAME.getOpt(), cubeName); sparkExecutable.setParam(SparkSqlBatch.OPTION_STEP_NAME.getOpt(), base64EncodeStr(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK)); sparkExecutable.setParam(SparkSqlBatch.OPTION_SEGMENT_ID.getOpt(), flatDesc.getSegment().getName()); sparkExecutable.setParam(SparkSqlBatch.OPTION_SQL_COUNT.getOpt(), String.valueOf(SparkCreatingFlatTable.SQL_COUNT)); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(0).getOpt(), base64EncodeStr(hiveInitStatements)); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(1).getOpt(), base64EncodeStr(dropTableHql)); // createTableHql include create table sql and alter table sql String[] sqlArr = createTableHql.trim().split(";"); if (2 != sqlArr.length) { throw new RuntimeException("create table hql should combined by a create table sql " + "and a alter sql, but got: " + createTableHql); } sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(2).getOpt(), base64EncodeStr(sqlArr[0])); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(3).getOpt(), base64EncodeStr(sqlArr[1])); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(4).getOpt(), base64EncodeStr(insertDataHqls)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, config.getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 18
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 4 votes |
protected static AbstractExecutable createFlatHiveTableBySparkSql(String hiveInitStatements, String jobWorkingDir, String cubeName, IJoinedFlatTableDesc flatDesc) { final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc); final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir); String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc); KylinConfig config = flatDesc.getSegment().getConfig(); final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(config); sparkExecutable.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK); sparkExecutable.setClassName(SparkCreatingFlatTable.class.getName()); sparkExecutable.setParam(SparkSqlBatch.OPTION_CUBE_NAME.getOpt(), cubeName); sparkExecutable.setParam(SparkSqlBatch.OPTION_STEP_NAME.getOpt(), base64EncodeStr(ExecutableConstants.STEP_NAME_CREATE_FLAT_TABLE_WITH_SPARK)); sparkExecutable.setParam(SparkSqlBatch.OPTION_SEGMENT_ID.getOpt(), flatDesc.getSegment().getName()); sparkExecutable.setParam(SparkSqlBatch.OPTION_SQL_COUNT.getOpt(), String.valueOf(SparkCreatingFlatTable.SQL_COUNT)); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(0).getOpt(), base64EncodeStr(hiveInitStatements)); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(1).getOpt(), base64EncodeStr(dropTableHql)); // createTableHql include create table sql and alter table sql String[] sqlArr = createTableHql.trim().split(";"); if (2 != sqlArr.length) { throw new RuntimeException("create table hql should combined by a create table sql " + "and a alter sql, but got: " + createTableHql); } sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(2).getOpt(), base64EncodeStr(sqlArr[0])); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(3).getOpt(), base64EncodeStr(sqlArr[1])); sparkExecutable.setParam(SparkCreatingFlatTable.getSqlOption(4).getOpt(), base64EncodeStr(insertDataHqls)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, config.getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example 19
Source File: HBaseSparkSteps.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
public AbstractExecutable createConvertCuboidToHfileStep(String jobId) { String cuboidRootPath = getCuboidRootPath(jobId); String inputPath = cuboidRootPath + (cuboidRootPath.endsWith("/") ? "" : "/"); SparkBatchCubingJobBuilder2 jobBuilder2 = new SparkBatchCubingJobBuilder2(seg, null); final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkCubeHFile.class.getName()); sparkExecutable.setParam(SparkCubeHFile.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubeHFile.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubeHFile.OPTION_INPUT_PATH.getOpt(), inputPath); sparkExecutable.setParam(SparkCubeHFile.OPTION_META_URL.getOpt(), jobBuilder2.getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkCubeHFile.OPTION_OUTPUT_PATH.getOpt(), getHFilePath(jobId)); sparkExecutable.setParam(SparkCubeHFile.OPTION_PARTITION_FILE_PATH.getOpt(), getRowkeyDistributionOutputPath(jobId) + "/part-r-00000_hfile"); sparkExecutable.setParam(AbstractHadoopJob.OPTION_HBASE_CONF_PATH.getOpt(), getHBaseConfFilePath(jobId)); sparkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.KeyValue.class)); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.class)); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.regionserver.BloomType.class)); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.protobuf.generated.HFileProtos.class)); //hbase-protocal.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.CompatibilityFactory.class)); //hbase-hadoop-compact.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.htrace.HTraceConfiguration", null)); // htrace-core.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.htrace.Trace", null)); // htrace-core.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("com.yammer.metrics.core.MetricsRegistry", null)); // metrics-core.jar //KYLIN-3607 StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory", null));//hbase-hadoop-compat-1.1.1.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactoryImpl", null));//hbase-hadoop2-compat-1.1.1.jar StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); sparkExecutable.setName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE); sparkExecutable.setCounterSaveAs(",," + CubingJob.CUBE_SIZE_BYTES, getCounterOutputPath(jobId)); return sparkExecutable; }
Example 20
Source File: HBaseFlinkSteps.java From kylin with Apache License 2.0 | 4 votes |
public AbstractExecutable createConvertCuboidToHfileStep(String jobId) { String cuboidRootPath = getCuboidRootPath(jobId); String inputPath = cuboidRootPath + (cuboidRootPath.endsWith("/") ? "" : "/"); FlinkBatchCubingJobBuilder2 jobBuilder2 = new FlinkBatchCubingJobBuilder2(seg, null, 0); final FlinkExecutable flinkExecutable = new FlinkExecutable(); flinkExecutable.setClassName(FlinkCubeHFile.class.getName()); flinkExecutable.setParam(FlinkCubeHFile.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubeHFile.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubeHFile.OPTION_META_URL.getOpt(), jobBuilder2.getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkCubeHFile.OPTION_OUTPUT_PATH.getOpt(), getHFilePath(jobId)); flinkExecutable.setParam(FlinkCubeHFile.OPTION_INPUT_PATH.getOpt(), inputPath); flinkExecutable.setParam(FlinkCubeHFile.OPTION_PARTITION_FILE_PATH.getOpt(), getRowkeyDistributionOutputPath(jobId) + "/part-r-00000_hfile"); flinkExecutable.setParam(AbstractHadoopJob.OPTION_HBASE_CONF_PATH.getOpt(), getHBaseConfFilePath(jobId)); flinkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.KeyValue.class)); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.class)); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.regionserver.BloomType.class)); StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.protobuf.generated.HFileProtos.class)); //hbase-protocal.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar(org.apache.hadoop.hbase.CompatibilityFactory.class)); //hbase-hadoop-compact.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.htrace.HTraceConfiguration", null)); // htrace-core.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.htrace.Trace", null)); // htrace-core.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("com.yammer.metrics.core.MetricsRegistry", null)); // metrics-core.jar //KYLIN-3607 StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory", null));//hbase-hadoop-compat-1.1.1.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactoryImpl", null));//hbase-hadoop2-compat-1.1.1.jar //KYLIN-3537 StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.io.hfile.HFileWriterImpl", null));//hbase-server.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader", null));//hbase-shaded-miscellaneous.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.metrics.MetricRegistry", null));//hbase-metrics-api.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.metrics.impl.MetricRegistriesImpl", null));//hbase-metrics.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hbase.thirdparty.com.google.protobuf.Message", null));//hbase-shaded-protobuf.jar StringUtil.appendWithSeparator(jars, ClassUtil.findContainingJar("org.apache.hadoop.hbase.shaded.protobuf.generated.HFileProtos", null));//hbase-protocol-shaded.jar if (!StringUtil.isEmpty(seg.getConfig().getFlinkAdditionalJars())) { StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); } flinkExecutable.setJars(jars.toString()); flinkExecutable.setName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE); flinkExecutable.setCounterSaveAs(",," + CubingJob.CUBE_SIZE_BYTES, getCounterOutputPath(jobId)); return flinkExecutable; }