Java Code Examples for org.apache.kylin.job.JoinedFlatTable#generateHiveInitStatements()

The following examples show how to use org.apache.kylin.job.JoinedFlatTable#generateHiveInitStatements() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);

    if (cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
        if (kylinConfig.isLivyEnabled()) {
            jobFlow.addTask(createFlatHiveTableByLivyStep(hiveInitStatements,
                    jobWorkingDir, cubeName, flatDesc));
        } else {
            if (kylinConfig.isSparCreateHiveTableViaSparkEnable()) {
                jobFlow.addTask(createFlatHiveTableBySparkSql(hiveInitStatements,
                        jobWorkingDir, cubeName, flatDesc));
            } else {
                jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
            }
        }
    } else {
        jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
    }
    //jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
}
 
Example 2
Source File: HiveInputBase.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase_ReplaceFlatTableGlobalColumnValue(DefaultChainedExecutable jobFlow) {
    KylinConfig dictConfig = flatDesc.getSegment().getConfig();
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    String globalDictTable = MRHiveDictUtil.globalDictTableName(flatDesc, cubeName);
    String globalDictDatabase = dictConfig.getMrHiveDictDB();

    String[] mrHiveDictColumnsExcludeRefCols = dictConfig.getMrHiveDictColumnsExcludeRefColumns();
    Map<String, String> dictRef = dictConfig.getMrHiveDictRefColumns();
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    if (Objects.nonNull(mrHiveDictColumnsExcludeRefCols) && mrHiveDictColumnsExcludeRefCols.length > 0) {
        jobFlow.addTask(createHiveGlobalDictMergeGlobalDict(flatDesc, hiveInitStatements, cubeName, mrHiveDictColumnsExcludeRefCols, globalDictDatabase, globalDictTable));
        for (String item : mrHiveDictColumnsExcludeRefCols) {
            dictRef.put(item, "");
        }
    }

    // replace step
    if (!dictRef.isEmpty()) {
        jobFlow.addTask(createMrHiveGlobalDictReplaceStep(flatDesc, hiveInitStatements, cubeName,
                dictRef, flatTableDatabase, globalDictDatabase, globalDictTable, dictConfig.getMrHiveDictTableSuffix(), jobFlow.getId()));
    }
}
 
Example 3
Source File: HiveInputBase.java    From kylin with Apache License 2.0 6 votes vote down vote up
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);

    if (cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
        if (kylinConfig.isLivyEnabled()) {
            jobFlow.addTask(createFlatHiveTableByLivyStep(hiveInitStatements,
                    jobWorkingDir, cubeName, flatDesc));
        } else {
            if (kylinConfig.isSparCreateHiveTableViaSparkEnable()) {
                jobFlow.addTask(createFlatHiveTableBySparkSql(hiveInitStatements,
                        jobWorkingDir, cubeName, flatDesc));
            } else {
                jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
            }
        }
    } else {
        jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
    }
    //jobFlow.addTask(createFlatHiveTableStep(hiveInitStatements, jobWorkingDir, cubeName, flatDesc));
}
 
Example 4
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    final KylinConfig cubeConfig = cubeInstance.getConfig();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    // create flat table first
    addStepPhase1_DoCreateFlatTable(jobFlow);

    // create global dict
    KylinConfig dictConfig = (flatDesc.getSegment()).getConfig();
    String[] mrHiveDictColumns = dictConfig.getMrHiveDictColumns();
    if (mrHiveDictColumns.length > 0) {
        String globalDictDatabase = dictConfig.getMrHiveDictDB();
        if (null == globalDictDatabase) {
            throw new IllegalArgumentException("Mr-Hive Global dict database is null.");
        }
        String globalDictTable = cubeName + dictConfig.getMrHiveDictTableSuffix();
        addStepPhase1_DoCreateMrHiveGlobalDict(jobFlow, mrHiveDictColumns, globalDictDatabase, globalDictTable);
    }

    // then count and redistribute
    if (cubeConfig.isHiveRedistributeEnabled()) {
        final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        //jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor()));
        if (kylinConfig.isLivyEnabled() && cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
            jobFlow.addTask(createRedistributeFlatHiveTableByLivyStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        } else {
            jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        }
    }

    // special for hive
    addStepPhase1_DoMaterializeLookupTable(jobFlow);
}
 
Example 5
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoCreateMrHiveGlobalDict(DefaultChainedExecutable jobFlow,
        String[] mrHiveDictColumns, String globalDictDatabase, String globalDictTable) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    jobFlow.addTask(createMrHiveGlobalDictExtractStep(flatDesc, hiveInitStatements, jobWorkingDir, cubeName,
            mrHiveDictColumns, globalDictDatabase, globalDictTable));
    jobFlow.addTask(createMrHIveGlobalDictBuildStep(flatDesc, hiveInitStatements, hdfsWorkingDir, cubeName,
            mrHiveDictColumns, flatTableDatabase, globalDictDatabase, globalDictTable));
    jobFlow.addTask(createMrHiveGlobalDictReplaceStep(flatDesc, hiveInitStatements, hdfsWorkingDir, cubeName,
            mrHiveDictColumns, flatTableDatabase, globalDictDatabase, globalDictTable));
}
 
Example 6
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) {
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir,
            flatDesc, hiveViewIntermediateTables, jobFlow.getId());
    if (task != null) {
        jobFlow.addTask(task);
    }
}
 
Example 7
Source File: HiveFlinkInput.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) {
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir,
            flatDesc, hiveViewIntermediateTables, jobFlow.getId());
    if (task != null) {
        jobFlow.addTask(task);
    }
}
 
Example 8
Source File: JdbcHiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    jobFlow.addTask(createSqoopToFlatHiveStep(jobWorkingDir, cubeName));
    jobFlow.addTask(createFlatHiveTableFromFiles(hiveInitStatements, jobWorkingDir));
}
 
Example 9
Source File: KafkaInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createFlatTable(final String hiveTableDatabase,
                                                    final String baseLocation, final String cubeName,
                                                    final StreamCubeFactTableDesc streamFactDesc, final List<String> intermediateTables,
                                                    final List<String> intermediatePaths) {
    final IJoinedFlatTableDesc flatDesc = streamFactDesc.getFlatTableDesc();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(hiveTableDatabase);

    final String dropFactTableHql = JoinedFlatTable.generateDropTableStatement(streamFactDesc);
    // the table inputformat is sequence file
    final String createFactTableHql = JoinedFlatTable.generateCreateTableStatement(streamFactDesc, baseLocation,
            JoinedFlatTable.SEQUENCEFILE);

    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, baseLocation);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);
    insertDataHqls = insertDataHqls.replace(
            quoteTableIdentity(flatDesc.getDataModel().getRootFactTable(), null) + " ",
            quoteTableIdentity(hiveTableDatabase, streamFactDesc.getTableName(), null) + " ");

    CreateFlatHiveTableStep step = new CreateFlatHiveTableStep();
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setInitStatement(hiveInitStatements);
    step.setCreateTableStatement(
            dropFactTableHql + createFactTableHql + dropTableHql + createTableHql + insertDataHqls);
    step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);

    intermediateTables.add(flatDesc.getTableName());
    intermediateTables.add(streamFactDesc.getTableName());
    intermediatePaths.add(baseLocation + "/" + flatDesc.getTableName());
    intermediatePaths.add(baseLocation + "/" + streamFactDesc.getTableName());
    return step;
}
 
Example 10
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    final KylinConfig cubeConfig = cubeInstance.getConfig();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    // create flat table first
    addStepPhase1_DoCreateFlatTable(jobFlow);

    // create hive global dictionary
    KylinConfig dictConfig = flatDesc.getSegment().getConfig();
    String[] mrHiveDictColumns = dictConfig.getMrHiveDictColumnsExcludeRefColumns();
    if (Objects.nonNull(mrHiveDictColumns) && mrHiveDictColumns.length > 0
            && !"".equals(mrHiveDictColumns[0])) {
        addStepPhase1_DoCreateMrHiveGlobalDict(jobFlow, mrHiveDictColumns);
    }

    // then count and redistribute
    if (cubeConfig.isHiveRedistributeEnabled()) {
        final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        //jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor()));
        if (kylinConfig.isLivyEnabled() && cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
            jobFlow.addTask(createRedistributeFlatHiveTableByLivyStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        } else {
            jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        }
    }

    // special for hive
    addStepPhase1_DoMaterializeLookupTable(jobFlow);
}
 
Example 11
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * 1. Create three related tables
 * 2. Insert distinct value into distinct value table
 * 3. Calculate statistics for dictionary
 */
protected void addStepPhase1_DoCreateMrHiveGlobalDict(DefaultChainedExecutable jobFlow, String[] mrHiveDictColumns) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    jobFlow.addTask(createMrHiveGlobalDictExtractStep(flatDesc, hiveInitStatements, cubeName,
            mrHiveDictColumns, jobFlow.getId()));

}
 
Example 12
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) {
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir,
            flatDesc, hiveViewIntermediateTables, jobFlow.getId());
    if (task != null) {
        jobFlow.addTask(task);
    }
}
 
Example 13
Source File: HiveFlinkInput.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) {
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir,
            flatDesc, hiveViewIntermediateTables, jobFlow.getId());
    if (task != null) {
        jobFlow.addTask(task);
    }
}
 
Example 14
Source File: JdbcHiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    jobFlow.addTask(createSqoopToFlatHiveStep(jobWorkingDir, cubeName));
    jobFlow.addTask(createFlatHiveTableFromFiles(hiveInitStatements, jobWorkingDir));
}
 
Example 15
Source File: KafkaInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected static AbstractExecutable createFlatTable(final String hiveTableDatabase,
                                                    final String baseLocation, final String cubeName,
                                                    final StreamCubeFactTableDesc streamFactDesc, final List<String> intermediateTables,
                                                    final List<String> intermediatePaths) {
    final IJoinedFlatTableDesc flatDesc = streamFactDesc.getFlatTableDesc();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(hiveTableDatabase);

    final String dropFactTableHql = JoinedFlatTable.generateDropTableStatement(streamFactDesc);
    // the table inputformat is sequence file
    final String createFactTableHql = JoinedFlatTable.generateCreateTableStatement(streamFactDesc, baseLocation,
            JoinedFlatTable.SEQUENCEFILE);

    final String dropTableHql = JoinedFlatTable.generateDropTableStatement(flatDesc);
    final String createTableHql = JoinedFlatTable.generateCreateTableStatement(flatDesc, baseLocation);
    String insertDataHqls = JoinedFlatTable.generateInsertDataStatement(flatDesc);
    insertDataHqls = insertDataHqls.replace(
            quoteTableIdentity(flatDesc.getDataModel().getRootFactTable(), null) + " ",
            quoteTableIdentity(hiveTableDatabase, streamFactDesc.getTableName(), null) + " ");

    CreateFlatHiveTableStep step = new CreateFlatHiveTableStep();
    CubingExecutableUtil.setCubeName(cubeName, step.getParams());
    step.setInitStatement(hiveInitStatements);
    step.setCreateTableStatement(
            dropFactTableHql + createFactTableHql + dropTableHql + createTableHql + insertDataHqls);
    step.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);

    intermediateTables.add(flatDesc.getTableName());
    intermediateTables.add(streamFactDesc.getTableName());
    intermediatePaths.add(baseLocation + "/" + flatDesc.getTableName());
    intermediatePaths.add(baseLocation + "/" + streamFactDesc.getTableName());
    return step;
}