org.apache.kylin.job.JoinedFlatTable Java Examples
The following examples show how to use
org.apache.kylin.job.JoinedFlatTable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setClassName(SparkFactDistinct.class.getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent())); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString()); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example #2
Source File: QueryGenerator.java From kylin with Apache License 2.0 | 6 votes |
public static String generateQuery(CubeDesc cubeDesc, Set<BitSet> selected, int maxNumOfDimension) { IJoinedFlatTableDesc flatDesc = new CubeJoinedFlatTableDesc(cubeDesc); String dimensionStatement = createDimensionStatement(cubeDesc.getDimensions(), selected, maxNumOfDimension); String measureStatement = createMeasureStatement(cubeDesc.getMeasures()); StringBuilder sql = new StringBuilder(); sql.append("SELECT" + "\n"); sql.append(dimensionStatement); sql.append(measureStatement); StringBuilder joinPart = new StringBuilder(); JoinedFlatTable.appendJoinStatement(flatDesc, joinPart, false, null); sql.append(joinPart.toString().replaceAll("DEFAULT\\.", "")); sql.append("GROUP BY" + "\n"); sql.append(dimensionStatement); String ret = sql.toString(); ret = ret.replaceAll("`", "\""); return ret; }
Example #3
Source File: SparkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); sparkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString()); }
Example #4
Source File: FlinkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); flinkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE); }
Example #5
Source File: FlinkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); flinkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE); }
Example #6
Source File: CubeController.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
/** * Get SQL of a Cube segment * * @param cubeName Cube Name * @param segmentName Segment Name * @return * @throws IOException */ @RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = { "application/json" }) @ResponseBody public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) { checkCubeExists(cubeName); CubeInstance cube = cubeService.getCubeManager().getCube(cubeName); CubeSegment segment = cube.getSegment(segmentName, null); if (segment == null) { throw new NotFoundException("Cannot find segment " + segmentName); } IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true); String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc); GeneralResponse response = new GeneralResponse(); response.setProperty("sql", sql); return response; }
Example #7
Source File: BaseCuboidBuilder.java From kylin with Apache License 2.0 | 6 votes |
private void checkHiveGlobalDictionaryColumn(){ Set<String> mrDictColumnSet = new HashSet<>(); if (kylinConfig.getMrHiveDictColumns() != null) { Collections.addAll(mrDictColumnSet, kylinConfig.getMrHiveDictColumns()); } for (MeasureDesc measure : measureDescList) { if (measure.getFunction().getExpression().equalsIgnoreCase(FunctionDesc.FUNC_COUNT_DISTINCT)) { FunctionDesc functionDesc = measure.getFunction(); TblColRef colRef = functionDesc.getParameter().getColRefs().get(0); if (mrDictColumnSet.contains(JoinedFlatTable.colName(colRef, true))) { functionDesc.setMrDict(true); logger.info("Enable hive global dictionary for {}", colRef); measure.setFunction(functionDesc); } } } }
Example #8
Source File: FlinkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) { final FlinkExecutable flinkExecutable = new FlinkExecutable(); final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent())); flinkExecutable.setJobId(jobId); flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS); flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); return flinkExecutable; }
Example #9
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 6 votes |
@Override public void addStepPhase_ReplaceFlatTableGlobalColumnValue(DefaultChainedExecutable jobFlow) { KylinConfig dictConfig = flatDesc.getSegment().getConfig(); final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams()); String globalDictTable = MRHiveDictUtil.globalDictTableName(flatDesc, cubeName); String globalDictDatabase = dictConfig.getMrHiveDictDB(); String[] mrHiveDictColumnsExcludeRefCols = dictConfig.getMrHiveDictColumnsExcludeRefColumns(); Map<String, String> dictRef = dictConfig.getMrHiveDictRefColumns(); final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); if (Objects.nonNull(mrHiveDictColumnsExcludeRefCols) && mrHiveDictColumnsExcludeRefCols.length > 0) { jobFlow.addTask(createHiveGlobalDictMergeGlobalDict(flatDesc, hiveInitStatements, cubeName, mrHiveDictColumnsExcludeRefCols, globalDictDatabase, globalDictTable)); for (String item : mrHiveDictColumnsExcludeRefCols) { dictRef.put(item, ""); } } // replace step if (!dictRef.isEmpty()) { jobFlow.addTask(createMrHiveGlobalDictReplaceStep(flatDesc, hiveInitStatements, cubeName, dictRef, flatTableDatabase, globalDictDatabase, globalDictTable, dictConfig.getMrHiveDictTableSuffix(), jobFlow.getId())); } }
Example #10
Source File: SparkBatchCubingJobBuilder2.java From kylin with Apache License 2.0 | 6 votes |
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); sparkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString()); }
Example #11
Source File: BaseCuboidBuilder.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void checkMrDictClolumn(){ Set<String> mrDictColumnSet = new HashSet<>(); if (kylinConfig.getMrHiveDictColumns() != null) { Collections.addAll(mrDictColumnSet, kylinConfig.getMrHiveDictColumns()); } for (MeasureDesc measure : measureDescList) { if (measure.getFunction().getExpression().equalsIgnoreCase(FunctionDesc.FUNC_COUNT_DISTINCT)) { FunctionDesc functionDesc = measure.getFunction(); TblColRef colRef = functionDesc.getParameter().getColRefs().get(0); if (mrDictColumnSet.contains(JoinedFlatTable.colName(colRef, true))) { functionDesc.setMrDict(true); logger.info("setMrDict for {}", colRef); measure.setFunction(functionDesc); } } } }
Example #12
Source File: CubeController.java From kylin with Apache License 2.0 | 6 votes |
/** * Get SQL of a Cube segment * * @param cubeName Cube Name * @param segmentName Segment Name * @return * @throws IOException */ @RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = { "application/json" }) @ResponseBody public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) { checkCubeExists(cubeName); CubeInstance cube = cubeService.getCubeManager().getCube(cubeName); CubeSegment segment = cube.getSegment(segmentName, null); if (segment == null) { throw new NotFoundException("Cannot find segment " + segmentName); } IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true); String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc); GeneralResponse response = new GeneralResponse(); response.setProperty("sql", sql); return response; }
Example #13
Source File: HiveInputBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) { final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir); org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep(); step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP); List<String> deleteTables = new ArrayList<>(); deleteTables.add(getIntermediateTableIdentity()); // mr-hive dict and inner table do not need delete hdfs String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns(); if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) { String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB(); String tableName = dictDb + "." + flatDesc.getTableName() + "_" + MRHiveDictUtil.DictHiveType.GroupBy.getName(); deleteTables.add(tableName); } step.setIntermediateTables(deleteTables); step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir))); step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ",")); jobFlow.addTask(step); }
Example #14
Source File: SparkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); sparkExecutable.setClassName(SparkFactDistinct.class.getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath); sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId)); sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent())); sparkExecutable.setJobId(jobId); sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString()); sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId)); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example #15
Source File: HiveInputBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) { final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams()); final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir); final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName); if (cubeInstance.getEngineType() == IEngineAware.ID_SPARK) { if (kylinConfig.isLivyEnabled()) { jobFlow.addTask(createFlatHiveTableByLivyStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } else { if (kylinConfig.isSparCreateHiveTableViaSparkEnable()) { jobFlow.addTask(createFlatHiveTableBySparkSql(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } else { jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } } } else { jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } //jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); }
Example #16
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 6 votes |
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) { final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams()); final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir); final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName); if (cubeInstance.getEngineType() == IEngineAware.ID_SPARK) { if (kylinConfig.isLivyEnabled()) { jobFlow.addTask(createFlatHiveTableByLivyStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } else { if (kylinConfig.isSparCreateHiveTableViaSparkEnable()) { jobFlow.addTask(createFlatHiveTableBySparkSql(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } else { jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } } } else { jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); } //jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc)); }
Example #17
Source File: JoinedFlatTableTest.java From Kylin with Apache License 2.0 | 5 votes |
@Test public void testGenCreateTableDDL() { String ddl = JoinedFlatTable.generateCreateTableStatement(intermediateTableDesc, "/tmp", fakeJobUUID); System.out.println(ddl); System.out.println("The length for the ddl is " + ddl.length()); }
Example #18
Source File: HiveInputBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow) { final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams()); CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName); final KylinConfig cubeConfig = cubeInstance.getConfig(); final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); // create flat table first addStepPhase1_DoCreateFlatTable(jobFlow); // create global dict KylinConfig dictConfig = (flatDesc.getSegment()).getConfig(); String[] mrHiveDictColumns = dictConfig.getMrHiveDictColumns(); if (mrHiveDictColumns.length > 0) { String globalDictDatabase = dictConfig.getMrHiveDictDB(); if (null == globalDictDatabase) { throw new IllegalArgumentException("Mr-Hive Global dict database is null."); } String globalDictTable = cubeName + dictConfig.getMrHiveDictTableSuffix(); addStepPhase1_DoCreateMrHiveGlobalDict(jobFlow, mrHiveDictColumns, globalDictDatabase, globalDictTable); } // then count and redistribute if (cubeConfig.isHiveRedistributeEnabled()) { final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); //jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor())); if (kylinConfig.isLivyEnabled() && cubeInstance.getEngineType() == IEngineAware.ID_SPARK) { jobFlow.addTask(createRedistributeFlatHiveTableByLivyStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor())); } else { jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor())); } } // special for hive addStepPhase1_DoMaterializeLookupTable(jobFlow); }
Example #19
Source File: JoinedFlatTableTest.java From Kylin with Apache License 2.0 | 5 votes |
@Test public void testGenerateInsertSql() throws IOException { String sqls = JoinedFlatTable.generateInsertDataStatement(intermediateTableDesc, fakeJobUUID, new JobEngineConfig(KylinConfig.getInstanceFromEnv())); System.out.println(sqls); int length = sqls.length(); assertEquals(1155, length); }
Example #20
Source File: JdbcHiveInputBase.java From kylin with Apache License 2.0 | 5 votes |
private AbstractExecutable createFlatHiveTableFromFiles(String hiveInitStatements, String jobWorkingDir) { final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc); String filedDelimiter = getConfig().getJdbcSourceFieldDelimiter(); // Sqoop does not support exporting SEQUENSEFILE to Hive now SQOOP-869 final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir, "TEXTFILE", filedDelimiter); HiveCmdStep step = new HiveCmdStep(); step.setCmd(hiveInitStatements + dropTableHql + createTableHql); step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE); return step; }
Example #21
Source File: CubeController.java From kylin with Apache License 2.0 | 5 votes |
/** * Get SQL of a Cube * * @param cubeName Cube Name * @return * @throws IOException */ @RequestMapping(value = "/{cubeName}/sql", method = { RequestMethod.GET }, produces = { "application/json" }) @ResponseBody public GeneralResponse getSql(@PathVariable String cubeName) { checkCubeExists(cubeName); CubeInstance cube = cubeService.getCubeManager().getCube(cubeName); IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), true); String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc); GeneralResponse response = new GeneralResponse(); response.setProperty("sql", sql); return response; }
Example #22
Source File: KafkaInputBase.java From kylin with Apache License 2.0 | 5 votes |
protected static AbstractExecutable createFlatTable(final String hiveTableDatabase, final String baseLocation, final String cubeName, final StreamCubeFactTableDesc streamFactDesc, final List<String> intermediateTables, final List<String> intermediatePaths) { final IJoinedFlatTableDesc flatDesc = streamFactDesc.getFlatTableDesc(); final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(hiveTableDatabase); final String dropFactTableHql = JoinedFlatTable.generateDropTableStatement(streamFactDesc); // the table inputformat is sequence file final String createFactTableHql = JoinedFlatTable.generateCreateTableStatement(streamFactDesc, baseLocation, JoinedFlatTable.SEQUENCEFILE); final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc); final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, baseLocation); String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc); insertDataHqls = insertDataHqls.replace( quoteTableIdentity(flatDesc.getDataModel().getRootFactTable(), null) + " ", quoteTableIdentity(hiveTableDatabase, streamFactDesc.getTableName(), null) + " "); CreateFlatHiveTableStep step = new CreateFlatHiveTableStep(); CubingExecutableUtil.setCubeName(cubeName, step.getParams()); step.setInitStatement(hiveInitStatements); step.setCreateTableStatement( dropFactTableHql + createFactTableHql + dropTableHql + createTableHql + insertDataHqls); step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE); intermediateTables.add(flatDesc.getTableName()); intermediateTables.add(streamFactDesc.getTableName()); intermediatePaths.add(baseLocation + "/" + flatDesc.getTableName()); intermediatePaths.add(baseLocation + "/" + streamFactDesc.getTableName()); return step; }
Example #23
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 5 votes |
@Override public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) { final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir); org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep(); step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP); List<String> deleteTables = new ArrayList<>(); deleteTables.add(getIntermediateTableIdentity()); // mr-hive dict and inner table do not need delete hdfs String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns(); if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) { String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB(); String tableName = dictDb + "." + flatDesc.getTableName() + flatDesc.getSegment().getConfig().getMrHiveDistinctValueTableSuffix(); String tableName2 = dictDb + "." + flatDesc.getTableName() + flatDesc.getSegment().getConfig().getMrHiveDictTableSuffix(); deleteTables.add(tableName); deleteTables.add(tableName2); } step.setIntermediateTables(deleteTables); step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir))); step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ",")); jobFlow.addTask(step); }
Example #24
Source File: KafkaMRInput.java From kylin with Apache License 2.0 | 5 votes |
@Override public void configureJob(Job job) { job.setInputFormatClass(SequenceFileInputFormat.class); String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID); IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment); String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc, JobBuilderSupport.getJobWorkingDir(conf, jobId)); try { FileInputFormat.addInputPath(job, new Path(inputPath)); } catch (IOException e) { throw new IllegalStateException(e); } }
Example #25
Source File: HiveFlinkInput.java From kylin with Apache License 2.0 | 5 votes |
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) { final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase); final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir); AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir, flatDesc, hiveViewIntermediateTables, jobFlow.getId()); if (task != null) { jobFlow.addTask(task); } }
Example #26
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 5 votes |
protected static AbstractExecutable createRedistributeFlatHiveTableByLivyStep(String hiveInitStatements, String cubeName, IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) { RedistributeFlatHiveTableByLivyStep step = new RedistributeFlatHiveTableByLivyStep(); step.setInitStatement(hiveInitStatements); step.setIntermediateTable(flatDesc.getTableName()); step.setRedistributeDataStatement(JoinedFlatTable.generateRedistributeFlatTableStatement(flatDesc, cubeDesc)); CubingExecutableUtil.setCubeName(cubeName, step.getParams()); step.setName(ExecutableConstants.STEP_NAME_REDISTRIBUTE_FLAT_HIVE_TABLE); return step; }
Example #27
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 5 votes |
protected static AbstractExecutable createRedistributeFlatHiveTableStep(String hiveInitStatements, String cubeName, IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) { RedistributeFlatHiveTableStep step = new RedistributeFlatHiveTableStep(); step.setInitStatement(hiveInitStatements); step.setIntermediateTable(flatDesc.getTableName()); step.setRedistributeDataStatement(JoinedFlatTable.generateRedistributeFlatTableStatement(flatDesc, cubeDesc)); CubingExecutableUtil.setCubeName(cubeName, step.getParams()); step.setName(ExecutableConstants.STEP_NAME_REDISTRIBUTE_FLAT_HIVE_TABLE); return step; }
Example #28
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 5 votes |
protected static AbstractExecutable createFlatHiveTableByLivyStep(String hiveInitStatements, String jobWorkingDir, String cubeName, IJoinedFlatTableDesc flatDesc) { //from hive to hive final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc); final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir); String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc); CreateFlatHiveTableByLivyStep step = new CreateFlatHiveTableByLivyStep(); step.setInitStatement(hiveInitStatements); step.setCreateTableStatement(dropTableHql + createTableHql + insertDataHqls); CubingExecutableUtil.setCubeName(cubeName, step.getParams()); step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE); return step; }
Example #29
Source File: HiveInputBase.java From kylin with Apache License 2.0 | 5 votes |
protected static AbstractExecutable createFlatHiveTableStep(String hiveInitStatements, String jobWorkingDir, String cubeName, IJoinedFlatTableDesc flatDesc) { //from hive to hive final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc); final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, jobWorkingDir); String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc); CreateFlatHiveTableStep step = new CreateFlatHiveTableStep(); step.setInitStatement(hiveInitStatements); step.setCreateTableStatement(dropTableHql + createTableHql + insertDataHqls); CubingExecutableUtil.setCubeName(cubeName, step.getParams()); step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE); return step; }
Example #30
Source File: CubeController.java From Kylin with Apache License 2.0 | 5 votes |
/** * Get hive SQL of the cube * * @param cubeName Cube Name * @return * @throws UnknownHostException * @throws IOException */ @RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = {RequestMethod.GET}) @ResponseBody public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) { CubeInstance cube = cubeService.getCubeManager().getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); CubeSegment cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.READY); CubeJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, cubeSegment); String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc); GeneralResponse repsonse = new GeneralResponse(); repsonse.setProperty("sql", sql); return repsonse; }