Java Code Examples for org.apache.kylin.job.execution.DefaultChainedExecutable#addTask()

The following examples show how to use org.apache.kylin.job.execution.DefaultChainedExecutable#addTask() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DefaultSchedulerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testRetryableException() throws Exception {
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task = new ErrorTestExecutable();
    job.addTask(task);

    System.setProperty("kylin.job.retry", "3");

    //don't retry on DefaultChainedExecutable, only retry on subtasks
    Assert.assertFalse(job.needRetry(1, new Exception("")));
    Assert.assertTrue(task.needRetry(1, new Exception("")));
    Assert.assertFalse(task.needRetry(1, null));
    Assert.assertFalse(task.needRetry(4, new Exception("")));

    System.setProperty("kylin.job.retry-exception-classes", "java.io.FileNotFoundException");

    Assert.assertTrue(task.needRetry(1, new FileNotFoundException()));
    Assert.assertFalse(task.needRetry(1, new Exception("")));
}
 
Example 2
Source File: DefaultSchedulerTest.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchedulerRestart() throws Exception {
    logger.info("testSchedulerRestart");

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task1 = new FiveSecondSucceedTestExecutable();
    job.addTask(task1);
    execMgr.addJob(job);

    //sleep 3s to make sure SucceedTestExecutable is running 
    Thread.sleep(3000);
    //scheduler failed due to some reason
    scheduler.shutdown();
    //restart
    startScheduler();

    waitForJobFinish(job.getId(), MAX_WAIT_TIME);
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(job.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task1.getId()).getState());
}
 
Example 3
Source File: HBaseJobSteps.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public void addCubingGarbageCollectionSteps(DefaultChainedExecutable jobFlow) {
    String jobId = jobFlow.getId();

    List<String> toDeletePaths = new ArrayList<>();
    toDeletePaths.add(getFactDistinctColumnsPath(jobId));
    toDeletePaths.add(getHFilePath(jobId));
    toDeletePaths.add(getShrunkenDictionaryPath(jobId));

    CubeSegment oldSegment = ((CubeInstance)seg.getRealization()).getOriginalSegmentToRefresh(seg);

    // refresh segment
    if (oldSegment != null && KylinConfig.getInstanceFromEnv().cleanStorageAfterDelOperation()) {
        // delete old hdfs job
        toDeletePaths.addAll(getRefreshingHDFSPaths());

        // drop old htables
        MergeGCStep hBaseGCStep = createHBaseGCStep(getRefreshingHTables());
        jobFlow.addTask(hBaseGCStep);
    }

    HDFSPathGarbageCollectionStep step = createHDFSPathGCStep(toDeletePaths, jobId);
    jobFlow.addTask(step);
}
 
Example 4
Source File: HBaseLookupMRSteps.java    From kylin with Apache License 2.0 6 votes vote down vote up
private void addLookupTableConvertToHFilesStep(DefaultChainedExecutable jobFlow, String tableName, String snapshotID) {
    MapReduceExecutable createHFilesStep = new MapReduceExecutable();
    createHFilesStep
            .setName(ExecutableConstants.STEP_NAME_MATERIALIZE_LOOKUP_TABLE_CONVERT_HFILE + ":" + tableName);
    StringBuilder cmd = new StringBuilder();

    appendMapReduceParameters(cmd);
    JobBuilderSupport.appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, cube.getName());
    JobBuilderSupport.appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT,
            getLookupTableHFilePath(tableName, jobFlow.getId()));
    JobBuilderSupport.appendExecCmdParameters(cmd, BatchConstants.ARG_TABLE_NAME, tableName);
    JobBuilderSupport.appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobFlow.getId());
    JobBuilderSupport.appendExecCmdParameters(cmd, BatchConstants.ARG_LOOKUP_SNAPSHOT_ID, snapshotID);
    JobBuilderSupport.appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
            "Kylin_LookupTable_HFile_Generator_" + tableName + "_Step");

    createHFilesStep.setMapReduceParams(cmd.toString());
    createHFilesStep.setMapReduceJobClass(LookupTableToHFileJob.class);
    createHFilesStep.setCounterSaveAs(BatchConstants.LOOKUP_EXT_SNAPSHOT_SRC_RECORD_CNT_PFX + tableName);

    jobFlow.addTask(createHFilesStep);
}
 
Example 5
Source File: KafkaInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
@Override
public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow) {

    boolean onlyOneTable = cubeDesc.getModel().getLookupTables().size() == 0;
    final String baseLocation = getJobWorkingDir(jobFlow);
    if (onlyOneTable) {
        // directly use flat table location
        final String intermediateFactTable = flatDesc.getTableName();
        final String tableLocation = baseLocation + "/" + intermediateFactTable;
        jobFlow.addTask(createSaveKafkaDataStep(jobFlow.getId(), tableLocation, seg));
        intermediatePaths.add(tableLocation);
    } else {
        // sink stream data as a mock fact table, and then join it with dimension tables
        final StreamCubeFactTableDesc streamFactDesc = new StreamCubeFactTableDesc(cubeDesc, seg, flatDesc);
        jobFlow.addTask(createSaveKafkaDataStep(jobFlow.getId(), baseLocation + "/" + streamFactDesc.getTableName(), seg));
        jobFlow.addTask(createFlatTable(hiveTableDatabase, baseLocation, cubeName,
                streamFactDesc, intermediateTables, intermediatePaths));
    }
}
 
Example 6
Source File: DefaultSchedulerTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchedulerStop() throws Exception {
    logger.info("testSchedulerStop");

    thrown.expect(RuntimeException.class);
    thrown.expectMessage("too long wait time");

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task1 = new FiveSecondSucceedTestExecutable();
    job.addTask(task1);
    execMgr.addJob(job);

    //sleep 3s to make sure SucceedTestExecutable is running 
    Thread.sleep(3000);
    //scheduler failed due to some reason
    scheduler.shutdown();

    waitForJobFinish(job.getId(), 6000);
}
 
Example 7
Source File: DefaultSchedulerTest.java    From kylin with Apache License 2.0 6 votes vote down vote up
@Test
public void testSchedulerRestart() throws Exception {
    logger.info("testSchedulerRestart");

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task1 = new FiveSecondSucceedTestExecutable();
    job.addTask(task1);
    execMgr.addJob(job);

    //sleep 3s to make sure SucceedTestExecutable is running 
    Thread.sleep(3000);
    //scheduler failed due to some reason
    scheduler.shutdown();
    //restart
    startScheduler();

    waitForJobFinish(job.getId(), MAX_WAIT_TIME);
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(job.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task1.getId()).getState());
}
 
Example 8
Source File: HBaseJobSteps.java    From kylin with Apache License 2.0 5 votes vote down vote up
public void addMergingGarbageCollectionSteps(DefaultChainedExecutable jobFlow) {
    String jobId = jobFlow.getId();

    MergeGCStep hBaseGCStep = createHBaseGCStep(getMergingHTables());
    jobFlow.addTask(hBaseGCStep);

    List<String> toDeletePaths = new ArrayList<>();
    toDeletePaths.addAll(getMergingHDFSPaths());
    toDeletePaths.add(getHFilePath(jobId));

    HDFSPathGarbageCollectionStep step = createHDFSPathGCStep(toDeletePaths, jobId);

    jobFlow.addTask(step);
}
 
Example 9
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable jobFlow) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName);
    final KylinConfig cubeConfig = cubeInstance.getConfig();

    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    // create flat table first
    addStepPhase1_DoCreateFlatTable(jobFlow);

    // create global dict
    KylinConfig dictConfig = (flatDesc.getSegment()).getConfig();
    String[] mrHiveDictColumns = dictConfig.getMrHiveDictColumns();
    if (mrHiveDictColumns.length > 0) {
        String globalDictDatabase = dictConfig.getMrHiveDictDB();
        if (null == globalDictDatabase) {
            throw new IllegalArgumentException("Mr-Hive Global dict database is null.");
        }
        String globalDictTable = cubeName + dictConfig.getMrHiveDictTableSuffix();
        addStepPhase1_DoCreateMrHiveGlobalDict(jobFlow, mrHiveDictColumns, globalDictDatabase, globalDictTable);
    }

    // then count and redistribute
    if (cubeConfig.isHiveRedistributeEnabled()) {
        final KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        //jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc, cubeInstance.getDescriptor()));
        if (kylinConfig.isLivyEnabled() && cubeInstance.getEngineType() == IEngineAware.ID_SPARK) {
            jobFlow.addTask(createRedistributeFlatHiveTableByLivyStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        } else {
            jobFlow.addTask(createRedistributeFlatHiveTableStep(hiveInitStatements, cubeName, flatDesc,
                    cubeInstance.getDescriptor()));
        }
    }

    // special for hive
    addStepPhase1_DoMaterializeLookupTable(jobFlow);
}
 
Example 10
Source File: HBaseLookupMRSteps.java    From kylin with Apache License 2.0 5 votes vote down vote up
private void addUpdateSnapshotQueryCacheStep(DefaultChainedExecutable jobFlow, String tableName, String snapshotID) {
    UpdateSnapshotCacheForQueryServersStep updateSnapshotCacheStep = new UpdateSnapshotCacheForQueryServersStep();
    updateSnapshotCacheStep.setName(ExecutableConstants.STEP_NAME_LOOKUP_SNAPSHOT_CACHE_UPDATE + ":" + tableName);

    LookupExecutableUtil.setProjectName(cube.getProject(), updateSnapshotCacheStep.getParams());
    LookupExecutableUtil.setLookupTableName(tableName, updateSnapshotCacheStep.getParams());
    LookupExecutableUtil.setLookupSnapshotID(snapshotID, updateSnapshotCacheStep.getParams());
    jobFlow.addTask(updateSnapshotCacheStep);
}
 
Example 11
Source File: ITDistributedSchedulerBaseTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchedulerLock() throws Exception {
    if (!lock(jobLock1, jobId1)) {
        throw new JobException("fail to get the lock");
    }
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    job.setId(jobId1);
    AbstractExecutable task1 = new SucceedTestExecutable();
    AbstractExecutable task2 = new SucceedTestExecutable();
    AbstractExecutable task3 = new SucceedTestExecutable();
    job.addTask(task1);
    job.addTask(task2);
    job.addTask(task3);
    execMgr.addJob(job);

    Assert.assertEquals(serverName1, getServerName(jobId1));

    waitForJobFinish(job.getId());

    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task1.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task2.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task3.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(job.getId()).getState());
    
    Thread.sleep(5000);

    Assert.assertEquals(null, getServerName(jobId1));
}
 
Example 12
Source File: CubeService.java    From Kylin with Apache License 2.0 5 votes vote down vote up
/**
 * Generate cardinality for table This will trigger a hadoop job
 * The result will be merged into table exd info
 *
 * @param tableName
 */
public void calculateCardinality(String tableName, String submitter) {
    String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
    tableName = dbTableName[0] + "." + dbTableName[1];
    TableDesc table = getMetadataManager().getTableDesc(tableName);
    final Map<String, String> tableExd = getMetadataManager().getTableDescExd(tableName);
    if (tableExd == null || table == null) {
        IllegalArgumentException e = new IllegalArgumentException("Cannot find table descirptor " + tableName);
        logger.error("Cannot find table descirptor " + tableName, e);
        throw e;
    }

    DefaultChainedExecutable job = new DefaultChainedExecutable();
    job.setName("Hive Column Cardinality calculation for table '" + tableName + "'");
    job.setSubmitter(submitter);

    String outPath = HiveColumnCardinalityJob.OUTPUT_PATH + "/" + tableName;
    String param = "-table " + tableName + " -output " + outPath;

    HadoopShellExecutable step1 = new HadoopShellExecutable();

    step1.setJobClass(HiveColumnCardinalityJob.class);
    step1.setJobParams(param);

    job.addTask(step1);

    HadoopShellExecutable step2 = new HadoopShellExecutable();

    step2.setJobClass(HiveColumnCardinalityUpdateJob.class);
    step2.setJobParams(param);
    job.addTask(step2);

    getExecutableManager().addJob(job);
}
 
Example 13
Source File: DefaultSchedulerTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testSucceed() throws Exception {
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task1 = new SucceedTestExecutable();
    BaseTestExecutable task2 = new SucceedTestExecutable();
    job.addTask(task1);
    job.addTask(task2);
    jobService.addJob(job);
    waitForJobFinish(job.getId());
    assertEquals(ExecutableState.SUCCEED, jobService.getOutput(job.getId()).getState());
    assertEquals(ExecutableState.SUCCEED, jobService.getOutput(task1.getId()).getState());
    assertEquals(ExecutableState.SUCCEED, jobService.getOutput(task2.getId()).getState());
}
 
Example 14
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable jobFlow) {
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    AbstractExecutable task = createLookupHiveViewMaterializationStep(hiveInitStatements, jobWorkingDir,
            flatDesc, hiveViewIntermediateTables, jobFlow.getId());
    if (task != null) {
        jobFlow.addTask(task);
    }
}
 
Example 15
Source File: ITDistributedSchedulerBaseTest.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchedulerLock() throws Exception {
    if (!lock(jobLock1, jobId1)) {
        throw new JobException("fail to get the lock");
    }
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    job.setId(jobId1);
    AbstractExecutable task1 = new SucceedTestExecutable();
    AbstractExecutable task2 = new SucceedTestExecutable();
    AbstractExecutable task3 = new SucceedTestExecutable();
    job.addTask(task1);
    job.addTask(task2);
    job.addTask(task3);
    execMgr.addJob(job);

    Assert.assertEquals(serverName1, getServerName(jobId1));

    waitForJobFinish(job.getId());

    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task1.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task2.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task3.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(job.getId()).getState());
    
    Thread.sleep(5000);

    Assert.assertEquals(null, getServerName(jobId1));
}
 
Example 16
Source File: DefaultSchedulerTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testSucceed() throws Exception {
    logger.info("testSucceed");
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task1 = new SucceedTestExecutable();
    BaseTestExecutable task2 = new SucceedTestExecutable();
    job.addTask(task1);
    job.addTask(task2);
    execMgr.addJob(job);
    waitForJobFinish(job.getId(), MAX_WAIT_TIME);
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(job.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task1.getId()).getState());
    Assert.assertEquals(ExecutableState.SUCCEED, execMgr.getOutput(task2.getId()).getState());
}
 
Example 17
Source File: DefaultSchedulerTest.java    From Kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testDiscard() throws Exception {
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    BaseTestExecutable task1 = new SelfStopExecutable();
    job.addTask(task1);
    jobService.addJob(job);
    waitForJobStatus(job.getId(), ExecutableState.RUNNING, 500);
    jobService.discardJob(job.getId());
    waitForJobFinish(job.getId());
    assertEquals(ExecutableState.DISCARDED, jobService.getOutput(job.getId()).getState());
    assertEquals(ExecutableState.DISCARDED, jobService.getOutput(task1.getId()).getState());
    Thread.sleep(5000);
    System.out.println(job);
}
 
Example 18
Source File: HiveInputBase.java    From kylin with Apache License 2.0 5 votes vote down vote up
/**
 * 1. Create three related tables
 * 2. Insert distinct value into distinct value table
 * 3. Calculate statistics for dictionary
 */
protected void addStepPhase1_DoCreateMrHiveGlobalDict(DefaultChainedExecutable jobFlow, String[] mrHiveDictColumns) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);

    jobFlow.addTask(createMrHiveGlobalDictExtractStep(flatDesc, hiveInitStatements, cubeName,
            mrHiveDictColumns, jobFlow.getId()));

}
 
Example 19
Source File: DefaultSchedulerTest.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Test
public void testDiscard() throws Exception {
    logger.info("testDiscard");
    DefaultChainedExecutable job = new DefaultChainedExecutable();
    SelfStopExecutable task1 = new SelfStopExecutable();
    job.addTask(task1);
    execMgr.addJob(job);
    Thread.sleep(1100); // give time to launch job/task1 
    waitForJobStatus(job.getId(), ExecutableState.RUNNING, 500);
    execMgr.discardJob(job.getId());
    waitForJobFinish(job.getId(), MAX_WAIT_TIME);
    Assert.assertEquals(ExecutableState.DISCARDED, execMgr.getOutput(job.getId()).getState());
    Assert.assertEquals(ExecutableState.DISCARDED, execMgr.getOutput(task1.getId()).getState());
    task1.waitForDoWork();
}
 
Example 20
Source File: HiveInputBase.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
protected void addStepPhase1_DoCreateMrHiveGlobalDict(DefaultChainedExecutable jobFlow,
        String[] mrHiveDictColumns, String globalDictDatabase, String globalDictTable) {
    final String cubeName = CubingExecutableUtil.getCubeName(jobFlow.getParams());
    final String hiveInitStatements = JoinedFlatTable.generateHiveInitStatements(flatTableDatabase);
    final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);

    jobFlow.addTask(createMrHiveGlobalDictExtractStep(flatDesc, hiveInitStatements, jobWorkingDir, cubeName,
            mrHiveDictColumns, globalDictDatabase, globalDictTable));
    jobFlow.addTask(createMrHIveGlobalDictBuildStep(flatDesc, hiveInitStatements, hdfsWorkingDir, cubeName,
            mrHiveDictColumns, flatTableDatabase, globalDictDatabase, globalDictTable));
    jobFlow.addTask(createMrHiveGlobalDictReplaceStep(flatDesc, hiveInitStatements, hdfsWorkingDir, cubeName,
            mrHiveDictColumns, flatTableDatabase, globalDictDatabase, globalDictTable));
}