org.apache.kylin.cube.CubeSegment#getCubeDesc

Source File: CubeStatsReader.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
                                                              long sourceRowCount, boolean origin) {
    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    final List<Integer> rowkeyColumnSize = Lists.newArrayList();
    final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    final List<TblColRef> columnList = baseCuboid.getColumns();
    final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
    final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());

    for (int i = 0; i < columnList.size(); i++) {
        rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
    }

    Map<Long, Double> sizeMap = Maps.newHashMap();
    for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
        sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
                baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
    }

    if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
        optimizeSizeMap(sizeMap, cubeSegment);
    }

    return sizeMap;
}

Source File: CubeStatsReader.java From kylin with Apache License 2.0

6 votes

private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
                                                              long sourceRowCount, boolean origin) {
    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    final List<Integer> rowkeyColumnSize = Lists.newArrayList();
    final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    final List<TblColRef> columnList = baseCuboid.getColumns();
    final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
    final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());

    for (int i = 0; i < columnList.size(); i++) {
        rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
    }

    Map<Long, Double> sizeMap = Maps.newHashMap();
    for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
        sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
                baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
    }

    if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
        optimizeSizeMap(sizeMap, cubeSegment);
    }

    return sizeMap;
}

Source File: CubingJobBuilder.java From Kylin with Apache License 2.0

5 votes

Pair<AbstractExecutable, AbstractExecutable> addCubingSteps(CubeSegment seg, String cuboidRootPath, CubingJob result) {
    final int groupRowkeyColumnsCount = seg.getCubeDesc().getRowkey().getNCuboidBuildLevels();
    final int totalRowkeyColumnsCount = seg.getCubeDesc().getRowkey().getRowKeyColumns().length;

    final String jobId = result.getId();
    final CubeJoinedFlatTableDesc intermediateTableDesc = new CubeJoinedFlatTableDesc(seg.getCubeDesc(), seg);
    final String intermediateHiveTableName = getIntermediateHiveTableName(intermediateTableDesc, jobId);
    final String intermediateHiveTableLocation = getIntermediateHiveTableLocation(intermediateTableDesc, jobId);
    final String factDistinctColumnsPath = getFactDistinctColumnsPath(seg, jobId);
    final String[] cuboidOutputTempPath = getCuboidOutputPaths(cuboidRootPath, totalRowkeyColumnsCount, groupRowkeyColumnsCount);

    final AbstractExecutable intermediateHiveTableStep = createIntermediateHiveTableStep(intermediateTableDesc, jobId);
    result.addTask(intermediateHiveTableStep);

    result.addTask(createFactDistinctColumnsStep(seg, intermediateHiveTableName, jobId));

    result.addTask(createBuildDictionaryStep(seg, factDistinctColumnsPath));

    // base cuboid step
    final MapReduceExecutable baseCuboidStep = createBaseCuboidStep(seg, intermediateHiveTableLocation, cuboidOutputTempPath);
    result.addTask(baseCuboidStep);

    // n dim cuboid steps
    for (int i = 1; i <= groupRowkeyColumnsCount; i++) {
        int dimNum = totalRowkeyColumnsCount - i;
        result.addTask(createNDimensionCuboidStep(seg, cuboidOutputTempPath, dimNum, totalRowkeyColumnsCount));
    }

    return new Pair<AbstractExecutable, AbstractExecutable>(intermediateHiveTableStep, baseCuboidStep);
}

Source File: RowKeySplitter.java From kylin with Apache License 2.0

5 votes

public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}

Source File: RowKeySplitter.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}

Source File: KafkaInputBase.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public BaseBatchCubingInputSide(CubeSegment seg, IJoinedFlatTableDesc flatDesc) {
    this.conf = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
    this.config = seg.getConfig();
    this.flatDesc = flatDesc;
    this.hiveTableDatabase = config.getHiveDatabaseForIntermediateTable();
    this.seg = seg;
    this.cubeDesc = seg.getCubeDesc();
    this.cubeName = seg.getCubeInstance().getName();
}

Source File: CuboidSchedulerUtil.java From kylin with Apache License 2.0

5 votes

public static CuboidScheduler getCuboidScheduler(CubeSegment segment, Set<Long> cuboidSet) {
    try {
        Map<Long, Long> cuboidsWithRowCnt = CuboidStatsReaderUtil.readCuboidStatsFromSegment(cuboidSet, segment);
        Comparator<Long> comparator = cuboidsWithRowCnt == null ? Cuboid.cuboidSelectComparator
                : new TreeCuboidScheduler.CuboidCostComparator(cuboidsWithRowCnt);
        return new TreeCuboidScheduler(segment.getCubeDesc(), Lists.newArrayList(cuboidSet), comparator);
    } catch (IOException e) {
        throw new RuntimeException("Fail to cube stats for segment" + segment + " due to " + e);
    }
}

Source File: RowKeyDecoder.java From Kylin with Apache License 2.0

4 votes

public RowKeyDecoder(CubeSegment cubeSegment) {
    this.cubeDesc = cubeSegment.getCubeDesc();
    this.rowKeySplitter = new RowKeySplitter(cubeSegment, 65, 255);
    this.colIO = new RowKeyColumnIO(cubeSegment);
    this.values = new ArrayList<String>();
}

Source File: CubeDimEncMap.java From kylin with Apache License 2.0

4 votes

public CubeDimEncMap(CubeSegment seg) {
    this.cubeDesc = seg.getCubeDesc();
    this.seg = seg;
    this.dictionaryMap = null;
}

Source File: RowKeyDecoder.java From kylin with Apache License 2.0

4 votes

public RowKeyDecoder(CubeSegment cubeSegment) {
    this.cubeDesc = cubeSegment.getCubeDesc();
    this.rowKeySplitter = new RowKeySplitter(cubeSegment);
    this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap());
    this.values = new ArrayList<String>();
}

Source File: CubeJoinedFlatTableDesc.java From kylin with Apache License 2.0

4 votes

public CubeJoinedFlatTableDesc(CubeSegment cubeSegment, boolean includingDerived) {
    this(cubeSegment.getCubeDesc(), cubeSegment, includingDerived);
}

Source File: CubeJoinedFlatTableDesc.java From kylin with Apache License 2.0

4 votes

public CubeJoinedFlatTableDesc(CubeSegment cubeSegment) {
    this(cubeSegment.getCubeDesc(), cubeSegment, false);
}

Source File: CreateHTableJob.java From kylin with Apache License 2.0

4 votes

public static byte[][] getRegionSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap,
        final KylinConfig kylinConfig, final CubeSegment cubeSegment, final Path hfileSplitsOutputFolder)
        throws IOException {

    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    float cut = cubeDesc.getConfig().getKylinHBaseRegionCut();

    logger.info("Cut for HBase region is {} GB", cut);

    double totalSizeInM = 0;
    for (Double cuboidSize : cubeSizeMap.values()) {
        totalSizeInM += cuboidSize;
    }

    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cubeSizeMap.keySet());
    Collections.sort(allCuboids);

    int nRegion = Math.round((float) (totalSizeInM / (cut * 1024L)));
    nRegion = Math.max(kylinConfig.getHBaseRegionCountMin(), nRegion);
    nRegion = Math.min(kylinConfig.getHBaseRegionCountMax(), nRegion);

    if (cubeSegment.isEnableSharding()) {
        //use prime nRegions to help random sharding
        int original = nRegion;
        if (nRegion == 0) {
            nRegion = 1;
        }

        if (nRegion > Short.MAX_VALUE) {
            logger.info("Too many regions! reduce to {}", Short.MAX_VALUE);
            nRegion = Short.MAX_VALUE;
        }

        if (nRegion != original) {
            logger.info("Region count is adjusted from {} to {} to help random sharding", original, nRegion);
        }
    }

    int mbPerRegion = (int) (totalSizeInM / nRegion);
    mbPerRegion = Math.max(1, mbPerRegion);

    logger.info("Total size {} M (estimated)", totalSizeInM);
    logger.info("Expecting {} regions.", nRegion);
    logger.info("Expecting {} MB per region.", mbPerRegion);

    if (cubeSegment.isEnableSharding()) {
        //each cuboid will be split into different number of shards
        HashMap<Long, Short> cuboidShards = Maps.newHashMap();

        //each shard/region may be split into multiple hfiles; array index: region ID, Map: key: cuboidID, value cuboid size in the region
        List<HashMap<Long, Double>> innerRegionSplits = Lists.newArrayList();
        for (int i = 0; i < nRegion; i++) {
            innerRegionSplits.add(new HashMap<Long, Double>());
        }

        double[] regionSizes = new double[nRegion];
        for (long cuboidId : allCuboids) {
            double estimatedSize = cubeSizeMap.get(cuboidId);
            double magic = 23;
            int shardNum = (int) (estimatedSize * magic / mbPerRegion + 1);
            if (shardNum < 1) {
                shardNum = 1;
            }

            if (shardNum > nRegion) {
                logger.debug(String.format(Locale.ROOT,
                        "Cuboid %d 's estimated size %.2f MB will generate %d regions, " + "reduce to %d", cuboidId,
                        estimatedSize, shardNum, nRegion));
                shardNum = nRegion;
            } else {
                logger.debug(
                        String.format(Locale.ROOT, "Cuboid %d 's estimated size %.2f MB will generate %d regions",
                                cuboidId, estimatedSize, shardNum));
            }

            cuboidShards.put(cuboidId, (short) shardNum);
            short startShard = ShardingHash.getShard(cuboidId, nRegion);
            for (short i = startShard; i < startShard + shardNum; ++i) {
                short j = (short) (i % nRegion);
                regionSizes[j] = regionSizes[j] + estimatedSize / shardNum;
                innerRegionSplits.get(j).put(cuboidId, estimatedSize / shardNum);
            }
        }

        for (int i = 0; i < nRegion; ++i) {
            logger.debug("Region {}'s estimated size is {} MB, accounting for {} percent", i, regionSizes[i],
                    100.0 * regionSizes[i] / totalSizeInM);
        }

        CuboidShardUtil.saveCuboidShards(cubeSegment, cuboidShards, nRegion);
        saveHFileSplits(innerRegionSplits, mbPerRegion, hfileSplitsOutputFolder, kylinConfig);
        return getSplitsByRegionCount(nRegion);
    } else {
        throw new IllegalStateException("Not supported");
    }
}

Source File: MapReduceUtil.java From kylin with Apache License 2.0

4 votes

/**
 * @param cuboidScheduler specified can provide more flexibility
 * */
public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,
        double totalMapInputMB, int level)
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    KylinConfig kylinConfig = cubeDesc.getConfig();

    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();
    logger.info("Having per reduce MB " + perReduceInputMB + ", reduce count ratio " + reduceCountRatio + ", level "
            + level);

    CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cuboidScheduler, kylinConfig);

    double parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst;

    if (level == -1) {
        //merge case
        double estimatedSize = cubeStatsReader.estimateCubeSize();
        adjustedCurrentLayerSizeEst = estimatedSize > totalMapInputMB ? totalMapInputMB : estimatedSize;
        logger.debug("estimated size {}, input size {}, adjustedCurrentLayerSizeEst: {}", estimatedSize,
                totalMapInputMB, adjustedCurrentLayerSizeEst);
    } else if (level == 0) {
        //base cuboid case TODO: the estimation could be very WRONG because it has no correction
        adjustedCurrentLayerSizeEst = cubeStatsReader.estimateLayerSize(0);
        logger.debug("adjustedCurrentLayerSizeEst: {}", adjustedCurrentLayerSizeEst);
    } else {
        parentLayerSizeEst = cubeStatsReader.estimateLayerSize(level - 1);
        currentLayerSizeEst = cubeStatsReader.estimateLayerSize(level);
        adjustedCurrentLayerSizeEst = totalMapInputMB / parentLayerSizeEst * currentLayerSizeEst;
        logger.debug(
                "totalMapInputMB: {}, parentLayerSizeEst: {}, currentLayerSizeEst: {}, adjustedCurrentLayerSizeEst: {}",
                totalMapInputMB, parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst);
    }

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(adjustedCurrentLayerSizeEst / perReduceInputMB * reduceCountRatio + 0.99);

    // adjust reducer number for cube which has DISTINCT_COUNT measures for better performance
    if (cubeDesc.hasMemoryHungryMeasures()) {
        logger.debug("Multiply reducer num by 4 to boost performance for memory hungry measures");
        numReduceTasks = numReduceTasks * 4;
    }

    // at least 1 reducer by default
    numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
    // no more than 500 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    return numReduceTasks;
}

Source File: CubeDimEncMap.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public CubeDimEncMap(CubeSegment seg) {
    this.cubeDesc = seg.getCubeDesc();
    this.seg = seg;
    this.dictionaryMap = null;
}

Source File: RowKeyDecoder.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public RowKeyDecoder(CubeSegment cubeSegment) {
    this.cubeDesc = cubeSegment.getCubeDesc();
    this.rowKeySplitter = new RowKeySplitter(cubeSegment);
    this.colIO = new RowKeyColumnIO(cubeSegment.getDimensionEncodingMap());
    this.values = new ArrayList<String>();
}

Source File: CubeJoinedFlatTableDesc.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public CubeJoinedFlatTableDesc(CubeSegment cubeSegment, boolean includingDerived) {
    this(cubeSegment.getCubeDesc(), cubeSegment, includingDerived);
}

Source File: CubeJoinedFlatTableDesc.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public CubeJoinedFlatTableDesc(CubeSegment cubeSegment) {
    this(cubeSegment.getCubeDesc(), cubeSegment, false);
}

Source File: CreateHTableJob.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public static byte[][] getRegionSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap,
        final KylinConfig kylinConfig, final CubeSegment cubeSegment, final Path hfileSplitsOutputFolder)
        throws IOException {

    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    float cut = cubeDesc.getConfig().getKylinHBaseRegionCut();

    logger.info("Cut for HBase region is {} GB", cut);

    double totalSizeInM = 0;
    for (Double cuboidSize : cubeSizeMap.values()) {
        totalSizeInM += cuboidSize;
    }

    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cubeSizeMap.keySet());
    Collections.sort(allCuboids);

    int nRegion = Math.round((float) (totalSizeInM / (cut * 1024L)));
    nRegion = Math.max(kylinConfig.getHBaseRegionCountMin(), nRegion);
    nRegion = Math.min(kylinConfig.getHBaseRegionCountMax(), nRegion);

    if (cubeSegment.isEnableSharding()) {
        //use prime nRegions to help random sharding
        int original = nRegion;
        if (nRegion == 0) {
            nRegion = 1;
        }

        if (nRegion > Short.MAX_VALUE) {
            logger.info("Too many regions! reduce to {}", Short.MAX_VALUE);
            nRegion = Short.MAX_VALUE;
        }

        if (nRegion != original) {
            logger.info("Region count is adjusted from {} to {} to help random sharding", original, nRegion);
        }
    }

    int mbPerRegion = (int) (totalSizeInM / nRegion);
    mbPerRegion = Math.max(1, mbPerRegion);

    logger.info("Total size {} M (estimated)", totalSizeInM);
    logger.info("Expecting {} regions.", nRegion);
    logger.info("Expecting {} MB per region.", mbPerRegion);

    if (cubeSegment.isEnableSharding()) {
        //each cuboid will be split into different number of shards
        HashMap<Long, Short> cuboidShards = Maps.newHashMap();

        //each shard/region may be split into multiple hfiles; array index: region ID, Map: key: cuboidID, value cuboid size in the region
        List<HashMap<Long, Double>> innerRegionSplits = Lists.newArrayList();
        for (int i = 0; i < nRegion; i++) {
            innerRegionSplits.add(new HashMap<Long, Double>());
        }

        double[] regionSizes = new double[nRegion];
        for (long cuboidId : allCuboids) {
            double estimatedSize = cubeSizeMap.get(cuboidId);
            double magic = 23;
            int shardNum = (int) (estimatedSize * magic / mbPerRegion + 1);
            if (shardNum < 1) {
                shardNum = 1;
            }

            if (shardNum > nRegion) {
                logger.debug(String.format(Locale.ROOT,
                        "Cuboid %d 's estimated size %.2f MB will generate %d regions, " + "reduce to %d", cuboidId,
                        estimatedSize, shardNum, nRegion));
                shardNum = nRegion;
            } else {
                logger.debug(
                        String.format(Locale.ROOT, "Cuboid %d 's estimated size %.2f MB will generate %d regions",
                                cuboidId, estimatedSize, shardNum));
            }

            cuboidShards.put(cuboidId, (short) shardNum);
            short startShard = ShardingHash.getShard(cuboidId, nRegion);
            for (short i = startShard; i < startShard + shardNum; ++i) {
                short j = (short) (i % nRegion);
                regionSizes[j] = regionSizes[j] + estimatedSize / shardNum;
                innerRegionSplits.get(j).put(cuboidId, estimatedSize / shardNum);
            }
        }

        for (int i = 0; i < nRegion; ++i) {
            logger.debug("Region {}'s estimated size is {} MB, accounting for {} percent", i, regionSizes[i],
                    100.0 * regionSizes[i] / totalSizeInM);
        }

        CuboidShardUtil.saveCuboidShards(cubeSegment, cuboidShards, nRegion);
        saveHFileSplits(innerRegionSplits, mbPerRegion, hfileSplitsOutputFolder, kylinConfig);
        return getSplitsByRegionCount(nRegion);
    } else {
        throw new IllegalStateException("Not supported");
    }
}

Source File: MapReduceUtil.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

/**
 * @param cuboidScheduler specified can provide more flexibility
 * */
public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler,
        double totalMapInputMB, int level)
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    KylinConfig kylinConfig = cubeDesc.getConfig();

    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();
    logger.info("Having per reduce MB " + perReduceInputMB + ", reduce count ratio " + reduceCountRatio + ", level "
            + level);

    CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, cuboidScheduler, kylinConfig);

    double parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst;

    if (level == -1) {
        //merge case
        double estimatedSize = cubeStatsReader.estimateCubeSize();
        adjustedCurrentLayerSizeEst = estimatedSize > totalMapInputMB ? totalMapInputMB : estimatedSize;
        logger.debug("estimated size {}, input size {}, adjustedCurrentLayerSizeEst: {}", estimatedSize,
                totalMapInputMB, adjustedCurrentLayerSizeEst);
    } else if (level == 0) {
        //base cuboid case TODO: the estimation could be very WRONG because it has no correction
        adjustedCurrentLayerSizeEst = cubeStatsReader.estimateLayerSize(0);
        logger.debug("adjustedCurrentLayerSizeEst: {}", adjustedCurrentLayerSizeEst);
    } else {
        parentLayerSizeEst = cubeStatsReader.estimateLayerSize(level - 1);
        currentLayerSizeEst = cubeStatsReader.estimateLayerSize(level);
        adjustedCurrentLayerSizeEst = totalMapInputMB / parentLayerSizeEst * currentLayerSizeEst;
        logger.debug(
                "totalMapInputMB: {}, parentLayerSizeEst: {}, currentLayerSizeEst: {}, adjustedCurrentLayerSizeEst: {}",
                totalMapInputMB, parentLayerSizeEst, currentLayerSizeEst, adjustedCurrentLayerSizeEst);
    }

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(adjustedCurrentLayerSizeEst / perReduceInputMB * reduceCountRatio + 0.99);

    // adjust reducer number for cube which has DISTINCT_COUNT measures for better performance
    if (cubeDesc.hasMemoryHungryMeasures()) {
        logger.debug("Multiply reducer num by 4 to boost performance for memory hungry measures");
        numReduceTasks = numReduceTasks * 4;
    }

    // at least 1 reducer by default
    numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
    // no more than 500 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    return numReduceTasks;
}

Java Code Examples for org.apache.kylin.cube.CubeSegment#getCubeDesc()