org.apache.kylin.cube.cuboid.Cuboid Java Exaples

Source File: CubeStatsReader.java From kylin with Apache License 2.0

6 votes

private static void printOneCuboidInfo(long parent, long cuboidID, Map<Long, Long> cuboidRows,
        Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) {
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < depth; i++) {
        sb.append("    ");
    }
    String cuboidName = Cuboid.getDisplayName(cuboidID, dimensionCount);
    sb.append("|---- Cuboid ").append(cuboidName);

    long rowCount = cuboidRows.get(cuboidID);
    double size = cuboidSizes.get(cuboidID);
    sb.append(", est row: ").append(rowCount).append(", est MB: ").append(formatDouble(size));

    if (parent != -1) {
        sb.append(", shrink: ").append(formatDouble(100.0 * cuboidRows.get(cuboidID) / cuboidRows.get(parent)))
                .append("%");
    }

    out.println(sb.toString());
}

Source File: SequentialCubeTupleIterator.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public SequentialCubeTupleIterator(List<CubeSegmentScanner> scanners, Cuboid cuboid,
        Set<TblColRef> selectedDimensions, List<TblColRef> rtGroups, Set<TblColRef> groups, //
        Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context, SQLDigest sqlDigest) {
    this.context = context;
    this.scanners = scanners;

    Set<TblColRef> selectedDims = Sets.newHashSet(selectedDimensions);
    selectedDims.addAll(rtGroups);

    segmentCubeTupleIterators = Lists.newArrayList();
    for (CubeSegmentScanner scanner : scanners) {
        segmentCubeTupleIterators.add(new SegmentCubeTupleIterator(scanner, cuboid, selectedDims, selectedMetrics, returnTupleInfo, context));
    }

    if (context.mergeSortPartitionResults() && !sqlDigest.isRawQuery) {
        //query with limit
        logger.info("Using SortedIteratorMergerWithLimit to merge segment results");
        Iterator<Iterator<ITuple>> transformed = (Iterator<Iterator<ITuple>>) (Iterator<?>) segmentCubeTupleIterators.iterator();
        tupleIterator = new SortedIteratorMergerWithLimit<ITuple>(transformed, context.getFinalPushDownLimit(), getTupleDimensionComparator(cuboid, groups, returnTupleInfo)).getIterator();
    } else {
        //normal case
        logger.info("Using Iterators.concat to merge segment results");
        tupleIterator = Iterators.concat(segmentCubeTupleIterators.iterator());
    }
}

Source File: SegmentCubeTupleIterator.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public SegmentCubeTupleIterator(CubeSegmentScanner scanner, Cuboid cuboid, Set<TblColRef> selectedDimensions, //
        Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context) {
    this.scanner = scanner;
    this.cuboid = cuboid;
    this.selectedDimensions = selectedDimensions;
    this.selectedMetrics = selectedMetrics;
    this.tupleInfo = returnTupleInfo;
    this.tuple = new Tuple(returnTupleInfo);
    this.context = context;

    CuboidToGridTableMapping mapping = context.getMapping();
    int[] gtDimsIdx = mapping.getDimIndexes(selectedDimensions);
    int[] gtMetricsIdx = mapping.getMetricsIndexes(selectedMetrics);
    // gtColIdx = gtDimsIdx + gtMetricsIdx
    int[] gtColIdx = new int[gtDimsIdx.length + gtMetricsIdx.length];
    System.arraycopy(gtDimsIdx, 0, gtColIdx, 0, gtDimsIdx.length);
    System.arraycopy(gtMetricsIdx, 0, gtColIdx, gtDimsIdx.length, gtMetricsIdx.length);

    this.gtValues = getGTValuesIterator(scanner.iterator(), scanner.getScanRequest(), gtDimsIdx, gtMetricsIdx);
    this.cubeTupleConverter = ((GTCubeStorageQueryBase) context.getStorageQuery()).newCubeTupleConverter(
            scanner.cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo);
}

Source File: ObserverEnabler.java From Kylin with Apache License 2.0

6 votes

public static ResultScanner scanWithCoprocessorIfBeneficial(CubeSegment segment, Cuboid cuboid, TupleFilter tupleFiler, //
        Collection<TblColRef> groupBy, Collection<RowValueDecoder> rowValueDecoders, StorageContext context, HTableInterface table, Scan scan) throws IOException {

    if (context.isCoprocessorEnabled() == false) {
        return table.getScanner(scan);
    }

    CoprocessorRowType type = CoprocessorRowType.fromCuboid(segment, cuboid);
    CoprocessorFilter filter = CoprocessorFilter.fromFilter(segment, tupleFiler);
    CoprocessorProjector projector = CoprocessorProjector.makeForObserver(segment, cuboid, groupBy);
    ObserverAggregators aggrs = ObserverAggregators.fromValueDecoders(rowValueDecoders);

    if (DEBUG_LOCAL_COPROCESSOR) {
        RegionScanner innerScanner = new RegionScannerAdapter(table.getScanner(scan));
        AggregationScanner aggrScanner = new AggregationScanner(type, filter, projector, aggrs, innerScanner);
        return new ResultScannerAdapter(aggrScanner);
    } else {
        scan.setAttribute(AggregateRegionObserver.COPROCESSOR_ENABLE, new byte[] { 0x01 });
        scan.setAttribute(AggregateRegionObserver.TYPE, CoprocessorRowType.serialize(type));
        scan.setAttribute(AggregateRegionObserver.PROJECTOR, CoprocessorProjector.serialize(projector));
        scan.setAttribute(AggregateRegionObserver.AGGREGATORS, ObserverAggregators.serialize(aggrs));
        scan.setAttribute(AggregateRegionObserver.FILTER, CoprocessorFilter.serialize(filter));
        return table.getScanner(scan);
    }
}

Source File: CubeStatsReader.java From kylin with Apache License 2.0

6 votes

private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap,
                                                              long sourceRowCount, boolean origin) {
    final CubeDesc cubeDesc = cubeSegment.getCubeDesc();
    final List<Integer> rowkeyColumnSize = Lists.newArrayList();
    final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
    final List<TblColRef> columnList = baseCuboid.getColumns();
    final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
    final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());

    for (int i = 0; i < columnList.size(); i++) {
        rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
    }

    Map<Long, Double> sizeMap = Maps.newHashMap();
    for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
        sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(),
                baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
    }

    if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
        optimizeSizeMap(sizeMap, cubeSegment);
    }

    return sizeMap;
}

Source File: StreamingSearchContext.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void calculateHitCuboid() {
    long basicCuboid = Cuboid.getBaseCuboidId(cubeDesc);
    this.setBasicCuboid(basicCuboid);
    if (!cubeDesc.getConfig().isStreamingBuildAdditionalCuboids()) {
        this.setHitCuboid(basicCuboid);
        return;
    }
    long targetCuboidID = identifyCuboid(dimensions);
    Set<Long> mandatoryCuboids = getMandatoryCuboids();
    for (long cuboidID : mandatoryCuboids) {
        if ((targetCuboidID & ~cuboidID) == 0) {
            this.setHitCuboid(cuboidID);
            return;
        }
    }
    this.setHitCuboid(basicCuboid);
}

Source File: FlinkCubingByLayer.java From kylin with Apache License 2.0

6 votes

@Override
public void mapPartition(Iterable<Tuple2<ByteArray, Object[]>> iterable, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception {
    for (Tuple2<ByteArray, Object[]> item : iterable) {
        byte[] key = item.f0.array();
        long cuboidId = rowKeySplitter.parseCuboid(key);
        final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId);

        // if still empty or null
        if (myChildren == null || myChildren.size() == 0) {
            continue;
        }
        rowKeySplitter.split(key);
        final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId);

        for (Long child : myChildren) {
            Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child);
            ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid,
                    rowKeySplitter.getSplitBuffers());

            collector.collect(new Tuple2<>(result, item.f1));
        }
    }
}

Source File: GTCubeStorageQueryBase.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void enableStreamAggregateIfBeneficial(Cuboid cuboid, Set<TblColRef> groupsD, StorageContext context) {
    CubeDesc cubeDesc = cuboid.getCubeDesc();
    boolean enabled = cubeDesc.getConfig().isStreamAggregateEnabled();

    Set<TblColRef> shardByInGroups = Sets.newHashSet();
    for (TblColRef col : cubeDesc.getShardByColumns()) {
        if (groupsD.contains(col)) {
            shardByInGroups.add(col);
        }
    }
    if (!shardByInGroups.isEmpty()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because shard by columns in groupD: {}",
                shardByInGroups);
    }

    if (!context.isNeedStorageAggregation()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because no storage aggregation");
    }

    if (enabled) {
        context.enableStreamAggregate();
    }
}

Source File: RowKeyEncoder.java From kylin with Apache License 2.0

6 votes

public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) {
    super(cubeSeg, cuboid);
    enableSharding = cubeSeg.isEnableSharding();
    headerLength = cubeSeg.getRowKeyPreambleSize();
    Set<TblColRef> shardByColumns = cubeSeg.getCubeDesc().getShardByColumns();
    if (shardByColumns.size() > 1) {
        throw new IllegalStateException("Does not support multiple UHC now");
    }
    colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap());
    for (TblColRef column : cuboid.getColumns()) {
        if (shardByColumns.contains(column)) {
            uhcOffset = bodyLength;
            uhcLength = colIO.getColumnLength(column);
        }
        bodyLength += colIO.getColumnLength(column);
    }
}

Source File: GTCubeStorageQueryRequest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public GTCubeStorageQueryRequest(Cuboid cuboid, Set<TblColRef> dimensions, //
        Set<TblColRef> groups, List<TblColRef> dynGroups, List<TupleExpression> dynGroupExprs, //
        Set<TblColRef> filterCols, Set<FunctionDesc> metrics, List<DynamicFunctionDesc> dynFuncs, //
        TupleFilter filter, TupleFilter havingFilter, StorageContext context) {
    this.cuboid = cuboid;
    this.dimensions = dimensions;
    this.groups = groups;
    this.dynGroups = dynGroups;
    this.dynGroupExprs = dynGroupExprs;
    this.filterCols = filterCols;
    this.metrics = metrics;
    this.dynFuncs = dynFuncs;
    this.filter = filter;
    this.havingFilter = havingFilter;
    this.context = context;
}

Source File: CubeSizeEstimationCLI.java From Kylin with Apache License 2.0

6 votes

public static long estimatedCubeSize(String cubeName, long[] cardinality) {
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cubeInstance = cubeManager.getCube(cubeName);
    CubeDesc cubeDesc = cubeInstance.getDescriptor();

    CuboidScheduler scheduler = new CuboidScheduler(cubeDesc);
    long baseCuboid = Cuboid.getBaseCuboidId(cubeDesc);
    LinkedList<Long> cuboidQueue = new LinkedList<Long>();
    cuboidQueue.push(baseCuboid);

    long totalSpace = 0;

    while (!cuboidQueue.isEmpty()) {
        long cuboidID = cuboidQueue.pop();
        Collection<Long> spanningCuboid = scheduler.getSpanningCuboid(cuboidID);
        for (Long sc : spanningCuboid) {
            cuboidQueue.push(sc);
        }

        totalSpace += estimateCuboidSpace(cuboidID, cardinality, cubeDesc);
    }
    return totalSpace;
}

Source File: CubeManager.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public CubeInstance dropCube(String cubeName, boolean deleteDesc) throws IOException {
    try (AutoLock lock = cubeMapLock.lockForWrite()) {
        logger.info("Dropping cube '{}'", cubeName);
        // load projects before remove cube from project

        // delete cube instance and cube desc
        CubeInstance cube = getCube(cubeName);

        // remove cube and update cache
        crud.delete(cube);
        Cuboid.clearCache(cube);

        if (deleteDesc && cube.getDescriptor() != null) {
            CubeDescManager.getInstance(config).removeCubeDesc(cube.getDescriptor());
        }

        // delete cube from project
        ProjectManager.getInstance(config).removeRealizationsFromProjects(RealizationType.CUBE, cubeName);

        return cube;
    }
}

Source File: SegmentCubeTupleIterator.java From kylin with Apache License 2.0

6 votes

public SegmentCubeTupleIterator(CubeSegmentScanner scanner, Cuboid cuboid, Set<TblColRef> selectedDimensions, //
        Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context) {
    this.scanner = scanner;
    this.cuboid = cuboid;
    this.selectedDimensions = selectedDimensions;
    this.selectedMetrics = selectedMetrics;
    this.tupleInfo = returnTupleInfo;
    this.tuple = new Tuple(returnTupleInfo);
    this.context = context;

    CuboidToGridTableMapping mapping = context.getMapping();
    int[] gtDimsIdx = mapping.getDimIndexes(selectedDimensions);
    int[] gtMetricsIdx = mapping.getMetricsIndexes(selectedMetrics);
    // gtColIdx = gtDimsIdx + gtMetricsIdx
    int[] gtColIdx = new int[gtDimsIdx.length + gtMetricsIdx.length];
    System.arraycopy(gtDimsIdx, 0, gtColIdx, 0, gtDimsIdx.length);
    System.arraycopy(gtMetricsIdx, 0, gtColIdx, gtDimsIdx.length, gtMetricsIdx.length);

    this.gtValues = getGTValuesIterator(scanner.iterator(), scanner.getScanRequest(), gtDimsIdx, gtMetricsIdx);
    this.cubeTupleConverter = ((GTCubeStorageQueryBase) context.getStorageQuery()).newCubeTupleConverter(
            scanner.cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo);
}

Source File: GTCubeStorageQueryBase.java From kylin with Apache License 2.0

6 votes

private void enableStreamAggregateIfBeneficial(Cuboid cuboid, Set<TblColRef> groupsD, StorageContext context) {
    CubeDesc cubeDesc = cuboid.getCubeDesc();
    boolean enabled = cubeDesc.getConfig().isStreamAggregateEnabled();

    Set<TblColRef> shardByInGroups = Sets.newHashSet();
    for (TblColRef col : cubeDesc.getShardByColumns()) {
        if (groupsD.contains(col)) {
            shardByInGroups.add(col);
        }
    }
    if (!shardByInGroups.isEmpty()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because shard by columns in groupD: {}",
                shardByInGroups);
    }

    if (!context.isNeedStorageAggregation()) {
        enabled = false;
        logger.debug("Aggregate partition results is not beneficial because no storage aggregation");
    }

    if (enabled) {
        context.enableStreamAggregate();
    }
}

Source File: NDCuboidBuilder.java From kylin with Apache License 2.0

6 votes

private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) {
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);

    // rowkey columns
    long mask = Long.highestOneBit(parentCuboid.getId());
    long parentCuboidId = parentCuboid.getId();
    long childCuboidId = childCuboid.getId();
    long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId());
    int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId
    int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & parentCuboidId) > 0) {// if the this bit position equals
            // 1
            if ((mask & childCuboidId) > 0) {// if the child cuboid has this
                // column
                System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length());
                offset += splitBuffers[index].length();
            }
            index++;
        }
        mask = mask >> 1;
    }

    rowkeyEncoder.fillHeader(newKeyBodyBuf.array());
}

Source File: NDCuboidBuilder.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) {
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);

    // rowkey columns
    long mask = Long.highestOneBit(parentCuboid.getId());
    long parentCuboidId = parentCuboid.getId();
    long childCuboidId = childCuboid.getId();
    long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId());
    int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId
    int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & parentCuboidId) > 0) {// if the this bit position equals
            // 1
            if ((mask & childCuboidId) > 0) {// if the child cuboid has this
                // column
                System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length());
                offset += splitBuffers[index].length();
            }
            index++;
        }
        mask = mask >> 1;
    }

    rowkeyEncoder.fillHeader(newKeyBodyBuf.array());
}

Source File: CoprocessorProjector.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public static CoprocessorProjector makeForObserver(final CubeSegment cubeSegment, final Cuboid cuboid, final Collection<TblColRef> dimensionColumns) {

        RowKeyEncoder rowKeyMaskEncoder = new RowKeyEncoder(cubeSegment, cuboid) {
            @Override
            public void fillHeader(byte[] bytes) {
                Arrays.fill(bytes, 0, this.getHeaderLength(), (byte) 0xff);
            }

            @Override
            protected void fillColumnValue(TblColRef column, int columnLen, String valueStr, byte[] outputValue, int outputValueOffset) {
                byte bits = dimensionColumns.contains(column) ? (byte) 0xff : 0x00;
                Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, bits);
            }
        };

        byte[] mask = rowKeyMaskEncoder.encode(new String[cuboid.getColumns().size()]);
        return new CoprocessorProjector(mask, dimensionColumns.size() != 0);
    }

Source File: AggregationGroup.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public int getBuildLevel() {
    int ret = 1;//base cuboid => partial cube root
    if (this.getPartialCubeFullMask() == Cuboid.getBaseCuboidId(cubeDesc)) {
        ret -= 1;//if partial cube's root is base cuboid, then one round less agg
    }

    ret += getNormalDims().size();
    for (HierarchyMask hierarchyMask : this.hierarchyMasks) {
        ret += hierarchyMask.allMasks.length;
    }
    for (Long joint : joints) {
        if ((joint & this.getHierarchyDimsMask()) == 0) {
            ret += 1;
        }
    }

    return ret;
}

Source File: RowKeyEncoderProvider.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public RowKeyEncoder getRowkeyEncoder(Cuboid cuboid) {
    RowKeyEncoder rowKeyEncoder = rowKeyEncoders.get(cuboid.getId());
    if (rowKeyEncoder == null) {
        rowKeyEncoder = new RowKeyEncoder(cubeSegment, cuboid);
        rowKeyEncoders.put(cuboid.getId(), rowKeyEncoder);
    }
    return rowKeyEncoder;
}

Source File: HadoopFileStorageQuery.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public GTCubeStorageQueryRequest getStorageQueryRequest(StorageContext context, SQLDigest sqlDigest,
                                                        TupleInfo returnTupleInfo) {
    context.setStorageQuery(this);

    //cope with queries with no aggregations
    RawQueryLastHacker.hackNoAggregations(sqlDigest, cubeDesc, returnTupleInfo);

    // Customized measure taking effect: e.g. allow custom measures to help raw queries
    notifyBeforeStorageQuery(sqlDigest);

    Collection<TblColRef> groups = sqlDigest.groupbyColumns;
    TupleFilter filter = sqlDigest.filter;

    // build dimension & metrics
    Set<TblColRef> dimensions = new LinkedHashSet<>();
    Set<FunctionDesc> metrics = new LinkedHashSet<>();
    buildDimensionsAndMetrics(sqlDigest, dimensions, metrics);

    // all dimensions = groups + other(like filter) dimensions
    Set<TblColRef> otherDims = Sets.newHashSet(dimensions);
    otherDims.removeAll(groups);

    // expand derived (xxxD means contains host columns only, derived columns were translated)
    Set<TblColRef> derivedPostAggregation = Sets.newHashSet();
    Set<TblColRef> groupsD = expandDerived(groups, derivedPostAggregation);
    Set<TblColRef> otherDimsD = expandDerived(otherDims, derivedPostAggregation);
    otherDimsD.removeAll(groupsD);

    // identify cuboid
    Set<TblColRef> dimensionsD = new LinkedHashSet<>();
    dimensionsD.addAll(groupsD);
    dimensionsD.addAll(otherDimsD);
    Cuboid cuboid = findCuboid(cubeInstance, dimensionsD, metrics);
    context.setCuboid(cuboid);
    return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD, null, null, null,
            metrics, null, null, null, context);
}

Source File: NDCuboidMapper.java From Kylin with Apache License 2.0

5 votes

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidId = rowKeySplitter.split(key.getBytes(), key.getLength());
    Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId);

    Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId);

    // if still empty or null
    if (myChildren == null || myChildren.size() == 0) {
        context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Skipped records").increment(1L);
        skipCounter++;
        if (skipCounter % BatchConstants.COUNTER_MAX == 0) {
            logger.info("Skipped " + skipCounter + " records!");
        }
        return;
    }

    context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Processed records").increment(1L);

    handleCounter++;
    if (handleCounter % BatchConstants.COUNTER_MAX == 0) {
        logger.info("Handled " + handleCounter + " records!");
    }

    for (Long child : myChildren) {
        Cuboid childCuboid = Cuboid.findById(cubeDesc, child);
        int keyLength = buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers());
        outputKey.set(keyBuf, 0, keyLength);
        context.write(outputKey, value);
    }

}

Source File: BaseCuboidMapper.java From Kylin with Apache License 2.0

5 votes

@Override
protected void setup(Context context) throws IOException {
    super.publishConfiguration(context.getConfiguration());

    cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
    segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME);
    intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER));
    if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) {
        throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length);
    }

    byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0];

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());

    cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeDesc = cube.getDescriptor();
    cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);

    intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment);

    bytesSplitter = new BytesSplitter(200, 4096);
    rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid);

    measureCodec = new MeasureCodec(cubeDesc.getMeasures());
    measures = new Object[cubeDesc.getMeasures().size()];

    int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
    keyBytesBuf = new byte[colCount][];

    initNullBytes();
}

Source File: RowKeyEncoderTest.java From Kylin with Apache License 2.0

5 votes

@Test
public void testEncodeWithSlr2() throws Exception {
    CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY");
    // CubeSegment seg = cube.getTheOnlySegment();
    CubeDesc cubeDesc = cube.getDescriptor();
    // String data =
    // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular";
    byte[][] data = new byte[9][];
    data[0] = Bytes.toBytes("123456789");
    data[1] = null;
    data[2] = null;
    data[3] = null;
    data[4] = null;
    data[5] = null;
    data[6] = null;
    data[7] = null;
    data[8] = null;

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
    AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid);

    byte[] encodedKey = rowKeyEncoder.encode(data);
    assertEquals(48, encodedKey.length);
    byte[] sellerId = Arrays.copyOfRange(encodedKey, 8, 26);
    byte[] cuboidId = Arrays.copyOfRange(encodedKey, 0, 8);
    byte[] rest = Arrays.copyOfRange(encodedKey, 26, encodedKey.length);
    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertArrayEquals(new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, rest);
}

Source File: InMemCubeBuilder.java From kylin with Apache License 2.0

5 votes

private GridTable newGridTableByCuboidID(long cuboidID) throws IOException {
    GTInfo info = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, cuboidID),
            new CubeDimEncMap(cubeDesc, dictionaryMap)
    );

    // Below several store implementation are very similar in performance. The ConcurrentDiskStore is the simplest.
    // MemDiskStore store = new MemDiskStore(info, memBudget == null ? MemoryBudgetController.ZERO_BUDGET : memBudget);
    // MemDiskStore store = new MemDiskStore(info, MemoryBudgetController.ZERO_BUDGET);
    IGTStore store = new ConcurrentDiskStore(info);

    GridTable gridTable = new GridTable(info, store);
    return gridTable;
}

Source File: CubeGridTable.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap, CuboidToGridTableMapping mapping) {
    GTInfo.Builder builder = GTInfo.builder();
    builder.setTableName("Cuboid " + cuboid.getId());
    builder.setCodeSystem(
            new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap()));
    builder.setColumns(mapping.getDataTypes());
    builder.setPrimaryKey(mapping.getPrimaryKey());
    builder.enableColumnBlock(mapping.getColumnBlocks());
    if (mapping instanceof CuboidToGridTableMappingExt) {
        builder.enableDynamicDims(((CuboidToGridTableMappingExt) mapping).getDynamicDims());
    }
    return builder.build();
}

Source File: CubeGridTable.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap) {
    CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid);

    GTInfo.Builder builder = GTInfo.builder();
    builder.setTableName("Cuboid " + cuboid.getId());
    builder.setCodeSystem(
            new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap()));
    builder.setColumns(mapping.getDataTypes());
    builder.setPrimaryKey(mapping.getPrimaryKey());
    builder.enableColumnBlock(mapping.getColumnBlocks());
    return builder.build();
}

Source File: CoprocessorRowType.java From kylin with Apache License 2.0

5 votes

public static CoprocessorRowType fromCuboid(CubeSegment seg, Cuboid cuboid) {
    List<TblColRef> colList = cuboid.getColumns();
    TblColRef[] cols = colList.toArray(new TblColRef[colList.size()]);
    RowKeyColumnIO colIO = new RowKeyColumnIO(seg.getDimensionEncodingMap());
    int[] colSizes = new int[cols.length];
    for (int i = 0; i < cols.length; i++) {
        colSizes[i] = colIO.getColumnLength(cols[i]);
    }
    return new CoprocessorRowType(cols, colSizes, seg.getRowKeyPreambleSize());
}

Source File: RowKeyEncoderTest.java From kylin with Apache License 2.0

5 votes

@Ignore
@Test
public void testEncodeWithSlr() throws Exception {
    CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY");
    // CubeSegment seg = cube.getTheOnlySegment();
    CubeDesc cubeDesc = cube.getDescriptor();
    // String data =
    // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular";
    String[] data = new String[9];
    data[0] = "123456789";
    data[1] = "2012-12-15";
    data[2] = "11848";
    data[3] = "Health & Beauty";
    data[4] = "Fragrances";
    data[5] = "Women";
    data[6] = "FP-GTC";
    data[7] = "0";
    data[8] = "15";

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
    RowKeyEncoder rowKeyEncoder = new RowKeyEncoder(cube.getFirstSegment(), baseCuboid);

    byte[] encodedKey = rowKeyEncoder.encode(data);
    assertEquals(43 + rowKeyEncoder.getHeaderLength(), encodedKey.length);
    byte[] shard = Arrays.copyOfRange(encodedKey, 0, RowConstants.ROWKEY_SHARDID_LEN);
    @SuppressWarnings("unused")
    byte[] sellerId = Arrays.copyOfRange(encodedKey, rowKeyEncoder.getHeaderLength(), 4 + rowKeyEncoder.getHeaderLength());
    byte[] cuboidId = Arrays.copyOfRange(encodedKey, RowConstants.ROWKEY_SHARDID_LEN, rowKeyEncoder.getHeaderLength());
    byte[] rest = Arrays.copyOfRange(encodedKey, 4 + rowKeyEncoder.getHeaderLength(), encodedKey.length);
    assertEquals(0, Bytes.toShort(shard));
    //        assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertArrayEquals(new byte[] { 11, 55, -13, 49, 49, 56, 52, 56, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest);
}

Source File: CubeJoinedFlatTableEnrich.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void parseCubeDesc() {
    Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);

    // build index for rowkey columns
    List<TblColRef> cuboidColumns = baseCuboid.getColumns();
    int rowkeyColCount = cubeDesc.getRowkey().getRowKeyColumns().length;
    rowKeyColumnIndexes = new int[rowkeyColCount];
    for (int i = 0; i < rowkeyColCount; i++) {
        TblColRef col = cuboidColumns.get(i);
        rowKeyColumnIndexes[i] = flatDesc.getColumnIndex(col);
    }

    List<MeasureDesc> measures = cubeDesc.getMeasures();
    int measureSize = measures.size();
    measureColumnIndexes = new int[measureSize][];
    for (int i = 0; i < measureSize; i++) {
        FunctionDesc func = measures.get(i).getFunction();
        List<TblColRef> colRefs = func.getParameter().getColRefs();
        if (colRefs == null) {
            measureColumnIndexes[i] = null;
        } else {
            measureColumnIndexes[i] = new int[colRefs.size()];
            for (int j = 0; j < colRefs.size(); j++) {
                TblColRef c = colRefs.get(j);
                measureColumnIndexes[i][j] = flatDesc.getColumnIndex(c);
            }
        }
    }
}

Source File: InMemCubeBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private GridTable newGridTableByCuboidID(long cuboidID) throws IOException {
    GTInfo info = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, cuboidID),
            new CubeDimEncMap(cubeDesc, dictionaryMap));

    // Below several store implementation are very similar in performance. The ConcurrentDiskStore is the simplest.
    // MemDiskStore store = new MemDiskStore(info, memBudget == null ? MemoryBudgetController.ZERO_BUDGET : memBudget);
    // MemDiskStore store = new MemDiskStore(info, MemoryBudgetController.ZERO_BUDGET);
    IGTStore store = new ConcurrentDiskStore(info);

    GridTable gridTable = new GridTable(info, store);
    return gridTable;
}

org.apache.kylin.cube.cuboid.Cuboid Java Examples