org.apache.kylin.cube.cuboid.Cuboid Java Examples
The following examples show how to use
org.apache.kylin.cube.cuboid.Cuboid.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CubeStatsReader.java From kylin with Apache License 2.0 | 6 votes |
private static void printOneCuboidInfo(long parent, long cuboidID, Map<Long, Long> cuboidRows, Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < depth; i++) { sb.append(" "); } String cuboidName = Cuboid.getDisplayName(cuboidID, dimensionCount); sb.append("|---- Cuboid ").append(cuboidName); long rowCount = cuboidRows.get(cuboidID); double size = cuboidSizes.get(cuboidID); sb.append(", est row: ").append(rowCount).append(", est MB: ").append(formatDouble(size)); if (parent != -1) { sb.append(", shrink: ").append(formatDouble(100.0 * cuboidRows.get(cuboidID) / cuboidRows.get(parent))) .append("%"); } out.println(sb.toString()); }
Example #2
Source File: SequentialCubeTupleIterator.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SequentialCubeTupleIterator(List<CubeSegmentScanner> scanners, Cuboid cuboid, Set<TblColRef> selectedDimensions, List<TblColRef> rtGroups, Set<TblColRef> groups, // Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context, SQLDigest sqlDigest) { this.context = context; this.scanners = scanners; Set<TblColRef> selectedDims = Sets.newHashSet(selectedDimensions); selectedDims.addAll(rtGroups); segmentCubeTupleIterators = Lists.newArrayList(); for (CubeSegmentScanner scanner : scanners) { segmentCubeTupleIterators.add(new SegmentCubeTupleIterator(scanner, cuboid, selectedDims, selectedMetrics, returnTupleInfo, context)); } if (context.mergeSortPartitionResults() && !sqlDigest.isRawQuery) { //query with limit logger.info("Using SortedIteratorMergerWithLimit to merge segment results"); Iterator<Iterator<ITuple>> transformed = (Iterator<Iterator<ITuple>>) (Iterator<?>) segmentCubeTupleIterators.iterator(); tupleIterator = new SortedIteratorMergerWithLimit<ITuple>(transformed, context.getFinalPushDownLimit(), getTupleDimensionComparator(cuboid, groups, returnTupleInfo)).getIterator(); } else { //normal case logger.info("Using Iterators.concat to merge segment results"); tupleIterator = Iterators.concat(segmentCubeTupleIterators.iterator()); } }
Example #3
Source File: SegmentCubeTupleIterator.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SegmentCubeTupleIterator(CubeSegmentScanner scanner, Cuboid cuboid, Set<TblColRef> selectedDimensions, // Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context) { this.scanner = scanner; this.cuboid = cuboid; this.selectedDimensions = selectedDimensions; this.selectedMetrics = selectedMetrics; this.tupleInfo = returnTupleInfo; this.tuple = new Tuple(returnTupleInfo); this.context = context; CuboidToGridTableMapping mapping = context.getMapping(); int[] gtDimsIdx = mapping.getDimIndexes(selectedDimensions); int[] gtMetricsIdx = mapping.getMetricsIndexes(selectedMetrics); // gtColIdx = gtDimsIdx + gtMetricsIdx int[] gtColIdx = new int[gtDimsIdx.length + gtMetricsIdx.length]; System.arraycopy(gtDimsIdx, 0, gtColIdx, 0, gtDimsIdx.length); System.arraycopy(gtMetricsIdx, 0, gtColIdx, gtDimsIdx.length, gtMetricsIdx.length); this.gtValues = getGTValuesIterator(scanner.iterator(), scanner.getScanRequest(), gtDimsIdx, gtMetricsIdx); this.cubeTupleConverter = ((GTCubeStorageQueryBase) context.getStorageQuery()).newCubeTupleConverter( scanner.cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo); }
Example #4
Source File: ObserverEnabler.java From Kylin with Apache License 2.0 | 6 votes |
public static ResultScanner scanWithCoprocessorIfBeneficial(CubeSegment segment, Cuboid cuboid, TupleFilter tupleFiler, // Collection<TblColRef> groupBy, Collection<RowValueDecoder> rowValueDecoders, StorageContext context, HTableInterface table, Scan scan) throws IOException { if (context.isCoprocessorEnabled() == false) { return table.getScanner(scan); } CoprocessorRowType type = CoprocessorRowType.fromCuboid(segment, cuboid); CoprocessorFilter filter = CoprocessorFilter.fromFilter(segment, tupleFiler); CoprocessorProjector projector = CoprocessorProjector.makeForObserver(segment, cuboid, groupBy); ObserverAggregators aggrs = ObserverAggregators.fromValueDecoders(rowValueDecoders); if (DEBUG_LOCAL_COPROCESSOR) { RegionScanner innerScanner = new RegionScannerAdapter(table.getScanner(scan)); AggregationScanner aggrScanner = new AggregationScanner(type, filter, projector, aggrs, innerScanner); return new ResultScannerAdapter(aggrScanner); } else { scan.setAttribute(AggregateRegionObserver.COPROCESSOR_ENABLE, new byte[] { 0x01 }); scan.setAttribute(AggregateRegionObserver.TYPE, CoprocessorRowType.serialize(type)); scan.setAttribute(AggregateRegionObserver.PROJECTOR, CoprocessorProjector.serialize(projector)); scan.setAttribute(AggregateRegionObserver.AGGREGATORS, ObserverAggregators.serialize(aggrs)); scan.setAttribute(AggregateRegionObserver.FILTER, CoprocessorFilter.serialize(filter)); return table.getScanner(scan); } }
Example #5
Source File: CubeStatsReader.java From kylin with Apache License 2.0 | 6 votes |
private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap, long sourceRowCount, boolean origin) { final CubeDesc cubeDesc = cubeSegment.getCubeDesc(); final List<Integer> rowkeyColumnSize = Lists.newArrayList(); final Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc); final List<TblColRef> columnList = baseCuboid.getColumns(); final CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap(); final Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId()); for (int i = 0; i < columnList.size(); i++) { rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding()); } Map<Long, Double> sizeMap = Maps.newHashMap(); for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) { sizeMap.put(entry.getKey(), estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(), baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount)); } if (origin == false && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) { optimizeSizeMap(sizeMap, cubeSegment); } return sizeMap; }
Example #6
Source File: StreamingSearchContext.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void calculateHitCuboid() { long basicCuboid = Cuboid.getBaseCuboidId(cubeDesc); this.setBasicCuboid(basicCuboid); if (!cubeDesc.getConfig().isStreamingBuildAdditionalCuboids()) { this.setHitCuboid(basicCuboid); return; } long targetCuboidID = identifyCuboid(dimensions); Set<Long> mandatoryCuboids = getMandatoryCuboids(); for (long cuboidID : mandatoryCuboids) { if ((targetCuboidID & ~cuboidID) == 0) { this.setHitCuboid(cuboidID); return; } } this.setHitCuboid(basicCuboid); }
Example #7
Source File: FlinkCubingByLayer.java From kylin with Apache License 2.0 | 6 votes |
@Override public void mapPartition(Iterable<Tuple2<ByteArray, Object[]>> iterable, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception { for (Tuple2<ByteArray, Object[]> item : iterable) { byte[] key = item.f0.array(); long cuboidId = rowKeySplitter.parseCuboid(key); final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { continue; } rowKeySplitter.split(key); final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId); for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child); ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); collector.collect(new Tuple2<>(result, item.f1)); } } }
Example #8
Source File: GTCubeStorageQueryBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void enableStreamAggregateIfBeneficial(Cuboid cuboid, Set<TblColRef> groupsD, StorageContext context) { CubeDesc cubeDesc = cuboid.getCubeDesc(); boolean enabled = cubeDesc.getConfig().isStreamAggregateEnabled(); Set<TblColRef> shardByInGroups = Sets.newHashSet(); for (TblColRef col : cubeDesc.getShardByColumns()) { if (groupsD.contains(col)) { shardByInGroups.add(col); } } if (!shardByInGroups.isEmpty()) { enabled = false; logger.debug("Aggregate partition results is not beneficial because shard by columns in groupD: {}", shardByInGroups); } if (!context.isNeedStorageAggregation()) { enabled = false; logger.debug("Aggregate partition results is not beneficial because no storage aggregation"); } if (enabled) { context.enableStreamAggregate(); } }
Example #9
Source File: RowKeyEncoder.java From kylin with Apache License 2.0 | 6 votes |
public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) { super(cubeSeg, cuboid); enableSharding = cubeSeg.isEnableSharding(); headerLength = cubeSeg.getRowKeyPreambleSize(); Set<TblColRef> shardByColumns = cubeSeg.getCubeDesc().getShardByColumns(); if (shardByColumns.size() > 1) { throw new IllegalStateException("Does not support multiple UHC now"); } colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap()); for (TblColRef column : cuboid.getColumns()) { if (shardByColumns.contains(column)) { uhcOffset = bodyLength; uhcLength = colIO.getColumnLength(column); } bodyLength += colIO.getColumnLength(column); } }
Example #10
Source File: GTCubeStorageQueryRequest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public GTCubeStorageQueryRequest(Cuboid cuboid, Set<TblColRef> dimensions, // Set<TblColRef> groups, List<TblColRef> dynGroups, List<TupleExpression> dynGroupExprs, // Set<TblColRef> filterCols, Set<FunctionDesc> metrics, List<DynamicFunctionDesc> dynFuncs, // TupleFilter filter, TupleFilter havingFilter, StorageContext context) { this.cuboid = cuboid; this.dimensions = dimensions; this.groups = groups; this.dynGroups = dynGroups; this.dynGroupExprs = dynGroupExprs; this.filterCols = filterCols; this.metrics = metrics; this.dynFuncs = dynFuncs; this.filter = filter; this.havingFilter = havingFilter; this.context = context; }
Example #11
Source File: CubeSizeEstimationCLI.java From Kylin with Apache License 2.0 | 6 votes |
public static long estimatedCubeSize(String cubeName, long[] cardinality) { KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cubeInstance = cubeManager.getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CuboidScheduler scheduler = new CuboidScheduler(cubeDesc); long baseCuboid = Cuboid.getBaseCuboidId(cubeDesc); LinkedList<Long> cuboidQueue = new LinkedList<Long>(); cuboidQueue.push(baseCuboid); long totalSpace = 0; while (!cuboidQueue.isEmpty()) { long cuboidID = cuboidQueue.pop(); Collection<Long> spanningCuboid = scheduler.getSpanningCuboid(cuboidID); for (Long sc : spanningCuboid) { cuboidQueue.push(sc); } totalSpace += estimateCuboidSpace(cuboidID, cardinality, cubeDesc); } return totalSpace; }
Example #12
Source File: CubeManager.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public CubeInstance dropCube(String cubeName, boolean deleteDesc) throws IOException { try (AutoLock lock = cubeMapLock.lockForWrite()) { logger.info("Dropping cube '{}'", cubeName); // load projects before remove cube from project // delete cube instance and cube desc CubeInstance cube = getCube(cubeName); // remove cube and update cache crud.delete(cube); Cuboid.clearCache(cube); if (deleteDesc && cube.getDescriptor() != null) { CubeDescManager.getInstance(config).removeCubeDesc(cube.getDescriptor()); } // delete cube from project ProjectManager.getInstance(config).removeRealizationsFromProjects(RealizationType.CUBE, cubeName); return cube; } }
Example #13
Source File: SegmentCubeTupleIterator.java From kylin with Apache License 2.0 | 6 votes |
public SegmentCubeTupleIterator(CubeSegmentScanner scanner, Cuboid cuboid, Set<TblColRef> selectedDimensions, // Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo, StorageContext context) { this.scanner = scanner; this.cuboid = cuboid; this.selectedDimensions = selectedDimensions; this.selectedMetrics = selectedMetrics; this.tupleInfo = returnTupleInfo; this.tuple = new Tuple(returnTupleInfo); this.context = context; CuboidToGridTableMapping mapping = context.getMapping(); int[] gtDimsIdx = mapping.getDimIndexes(selectedDimensions); int[] gtMetricsIdx = mapping.getMetricsIndexes(selectedMetrics); // gtColIdx = gtDimsIdx + gtMetricsIdx int[] gtColIdx = new int[gtDimsIdx.length + gtMetricsIdx.length]; System.arraycopy(gtDimsIdx, 0, gtColIdx, 0, gtDimsIdx.length); System.arraycopy(gtMetricsIdx, 0, gtColIdx, gtDimsIdx.length, gtMetricsIdx.length); this.gtValues = getGTValuesIterator(scanner.iterator(), scanner.getScanRequest(), gtDimsIdx, gtMetricsIdx); this.cubeTupleConverter = ((GTCubeStorageQueryBase) context.getStorageQuery()).newCubeTupleConverter( scanner.cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo); }
Example #14
Source File: GTCubeStorageQueryBase.java From kylin with Apache License 2.0 | 6 votes |
private void enableStreamAggregateIfBeneficial(Cuboid cuboid, Set<TblColRef> groupsD, StorageContext context) { CubeDesc cubeDesc = cuboid.getCubeDesc(); boolean enabled = cubeDesc.getConfig().isStreamAggregateEnabled(); Set<TblColRef> shardByInGroups = Sets.newHashSet(); for (TblColRef col : cubeDesc.getShardByColumns()) { if (groupsD.contains(col)) { shardByInGroups.add(col); } } if (!shardByInGroups.isEmpty()) { enabled = false; logger.debug("Aggregate partition results is not beneficial because shard by columns in groupD: {}", shardByInGroups); } if (!context.isNeedStorageAggregation()) { enabled = false; logger.debug("Aggregate partition results is not beneficial because no storage aggregation"); } if (enabled) { context.enableStreamAggregate(); } }
Example #15
Source File: NDCuboidBuilder.java From kylin with Apache License 2.0 | 6 votes |
private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid); // rowkey columns long mask = Long.highestOneBit(parentCuboid.getId()); long parentCuboidId = parentCuboid.getId(); long childCuboidId = childCuboid.getId(); long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId()); int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId for (int i = 0; i < parentCuboidIdActualLength; i++) { if ((mask & parentCuboidId) > 0) {// if the this bit position equals // 1 if ((mask & childCuboidId) > 0) {// if the child cuboid has this // column System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length()); offset += splitBuffers[index].length(); } index++; } mask = mask >> 1; } rowkeyEncoder.fillHeader(newKeyBodyBuf.array()); }
Example #16
Source File: NDCuboidBuilder.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid); // rowkey columns long mask = Long.highestOneBit(parentCuboid.getId()); long parentCuboidId = parentCuboid.getId(); long childCuboidId = childCuboid.getId(); long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId()); int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId for (int i = 0; i < parentCuboidIdActualLength; i++) { if ((mask & parentCuboidId) > 0) {// if the this bit position equals // 1 if ((mask & childCuboidId) > 0) {// if the child cuboid has this // column System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length()); offset += splitBuffers[index].length(); } index++; } mask = mask >> 1; } rowkeyEncoder.fillHeader(newKeyBodyBuf.array()); }
Example #17
Source File: CoprocessorProjector.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static CoprocessorProjector makeForObserver(final CubeSegment cubeSegment, final Cuboid cuboid, final Collection<TblColRef> dimensionColumns) { RowKeyEncoder rowKeyMaskEncoder = new RowKeyEncoder(cubeSegment, cuboid) { @Override public void fillHeader(byte[] bytes) { Arrays.fill(bytes, 0, this.getHeaderLength(), (byte) 0xff); } @Override protected void fillColumnValue(TblColRef column, int columnLen, String valueStr, byte[] outputValue, int outputValueOffset) { byte bits = dimensionColumns.contains(column) ? (byte) 0xff : 0x00; Arrays.fill(outputValue, outputValueOffset, outputValueOffset + columnLen, bits); } }; byte[] mask = rowKeyMaskEncoder.encode(new String[cuboid.getColumns().size()]); return new CoprocessorProjector(mask, dimensionColumns.size() != 0); }
Example #18
Source File: AggregationGroup.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public int getBuildLevel() { int ret = 1;//base cuboid => partial cube root if (this.getPartialCubeFullMask() == Cuboid.getBaseCuboidId(cubeDesc)) { ret -= 1;//if partial cube's root is base cuboid, then one round less agg } ret += getNormalDims().size(); for (HierarchyMask hierarchyMask : this.hierarchyMasks) { ret += hierarchyMask.allMasks.length; } for (Long joint : joints) { if ((joint & this.getHierarchyDimsMask()) == 0) { ret += 1; } } return ret; }
Example #19
Source File: RowKeyEncoderProvider.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public RowKeyEncoder getRowkeyEncoder(Cuboid cuboid) { RowKeyEncoder rowKeyEncoder = rowKeyEncoders.get(cuboid.getId()); if (rowKeyEncoder == null) { rowKeyEncoder = new RowKeyEncoder(cubeSegment, cuboid); rowKeyEncoders.put(cuboid.getId(), rowKeyEncoder); } return rowKeyEncoder; }
Example #20
Source File: HadoopFileStorageQuery.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public GTCubeStorageQueryRequest getStorageQueryRequest(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { context.setStorageQuery(this); //cope with queries with no aggregations RawQueryLastHacker.hackNoAggregations(sqlDigest, cubeDesc, returnTupleInfo); // Customized measure taking effect: e.g. allow custom measures to help raw queries notifyBeforeStorageQuery(sqlDigest); Collection<TblColRef> groups = sqlDigest.groupbyColumns; TupleFilter filter = sqlDigest.filter; // build dimension & metrics Set<TblColRef> dimensions = new LinkedHashSet<>(); Set<FunctionDesc> metrics = new LinkedHashSet<>(); buildDimensionsAndMetrics(sqlDigest, dimensions, metrics); // all dimensions = groups + other(like filter) dimensions Set<TblColRef> otherDims = Sets.newHashSet(dimensions); otherDims.removeAll(groups); // expand derived (xxxD means contains host columns only, derived columns were translated) Set<TblColRef> derivedPostAggregation = Sets.newHashSet(); Set<TblColRef> groupsD = expandDerived(groups, derivedPostAggregation); Set<TblColRef> otherDimsD = expandDerived(otherDims, derivedPostAggregation); otherDimsD.removeAll(groupsD); // identify cuboid Set<TblColRef> dimensionsD = new LinkedHashSet<>(); dimensionsD.addAll(groupsD); dimensionsD.addAll(otherDimsD); Cuboid cuboid = findCuboid(cubeInstance, dimensionsD, metrics); context.setCuboid(cuboid); return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD, null, null, null, metrics, null, null, null, context); }
Example #21
Source File: NDCuboidMapper.java From Kylin with Apache License 2.0 | 5 votes |
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidId = rowKeySplitter.split(key.getBytes(), key.getLength()); Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId); Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Skipped records").increment(1L); skipCounter++; if (skipCounter % BatchConstants.COUNTER_MAX == 0) { logger.info("Skipped " + skipCounter + " records!"); } return; } context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Processed records").increment(1L); handleCounter++; if (handleCounter % BatchConstants.COUNTER_MAX == 0) { logger.info("Handled " + handleCounter + " records!"); } for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findById(cubeDesc, child); int keyLength = buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(keyBuf, 0, keyLength); context.write(outputKey, value); } }
Example #22
Source File: BaseCuboidMapper.java From Kylin with Apache License 2.0 | 5 votes |
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(); segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME); intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER)); if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) { throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length); } byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0]; KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration()); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment); bytesSplitter = new BytesSplitter(200, 4096); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureCodec = new MeasureCodec(cubeDesc.getMeasures()); measures = new Object[cubeDesc.getMeasures().size()]; int colCount = cubeDesc.getRowkey().getRowKeyColumns().length; keyBytesBuf = new byte[colCount][]; initNullBytes(); }
Example #23
Source File: RowKeyEncoderTest.java From Kylin with Apache License 2.0 | 5 votes |
@Test public void testEncodeWithSlr2() throws Exception { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY"); // CubeSegment seg = cube.getTheOnlySegment(); CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; byte[][] data = new byte[9][]; data[0] = Bytes.toBytes("123456789"); data[1] = null; data[2] = null; data[3] = null; data[4] = null; data[5] = null; data[6] = null; data[7] = null; data[8] = null; long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(48, encodedKey.length); byte[] sellerId = Arrays.copyOfRange(encodedKey, 8, 26); byte[] cuboidId = Arrays.copyOfRange(encodedKey, 0, 8); byte[] rest = Arrays.copyOfRange(encodedKey, 26, encodedKey.length); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, rest); }
Example #24
Source File: InMemCubeBuilder.java From kylin with Apache License 2.0 | 5 votes |
private GridTable newGridTableByCuboidID(long cuboidID) throws IOException { GTInfo info = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, cuboidID), new CubeDimEncMap(cubeDesc, dictionaryMap) ); // Below several store implementation are very similar in performance. The ConcurrentDiskStore is the simplest. // MemDiskStore store = new MemDiskStore(info, memBudget == null ? MemoryBudgetController.ZERO_BUDGET : memBudget); // MemDiskStore store = new MemDiskStore(info, MemoryBudgetController.ZERO_BUDGET); IGTStore store = new ConcurrentDiskStore(info); GridTable gridTable = new GridTable(info, store); return gridTable; }
Example #25
Source File: CubeGridTable.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap, CuboidToGridTableMapping mapping) { GTInfo.Builder builder = GTInfo.builder(); builder.setTableName("Cuboid " + cuboid.getId()); builder.setCodeSystem( new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap())); builder.setColumns(mapping.getDataTypes()); builder.setPrimaryKey(mapping.getPrimaryKey()); builder.enableColumnBlock(mapping.getColumnBlocks()); if (mapping instanceof CuboidToGridTableMappingExt) { builder.enableDynamicDims(((CuboidToGridTableMappingExt) mapping).getDynamicDims()); } return builder.build(); }
Example #26
Source File: CubeGridTable.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public static GTInfo newGTInfo(Cuboid cuboid, IDimensionEncodingMap dimEncMap) { CuboidToGridTableMapping mapping = new CuboidToGridTableMapping(cuboid); GTInfo.Builder builder = GTInfo.builder(); builder.setTableName("Cuboid " + cuboid.getId()); builder.setCodeSystem( new CubeCodeSystem(mapping.getDimensionEncodings(dimEncMap), mapping.getDependentMetricsMap())); builder.setColumns(mapping.getDataTypes()); builder.setPrimaryKey(mapping.getPrimaryKey()); builder.enableColumnBlock(mapping.getColumnBlocks()); return builder.build(); }
Example #27
Source File: CoprocessorRowType.java From kylin with Apache License 2.0 | 5 votes |
public static CoprocessorRowType fromCuboid(CubeSegment seg, Cuboid cuboid) { List<TblColRef> colList = cuboid.getColumns(); TblColRef[] cols = colList.toArray(new TblColRef[colList.size()]); RowKeyColumnIO colIO = new RowKeyColumnIO(seg.getDimensionEncodingMap()); int[] colSizes = new int[cols.length]; for (int i = 0; i < cols.length; i++) { colSizes[i] = colIO.getColumnLength(cols[i]); } return new CoprocessorRowType(cols, colSizes, seg.getRowKeyPreambleSize()); }
Example #28
Source File: RowKeyEncoderTest.java From kylin with Apache License 2.0 | 5 votes |
@Ignore @Test public void testEncodeWithSlr() throws Exception { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY"); // CubeSegment seg = cube.getTheOnlySegment(); CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; String[] data = new String[9]; data[0] = "123456789"; data[1] = "2012-12-15"; data[2] = "11848"; data[3] = "Health & Beauty"; data[4] = "Fragrances"; data[5] = "Women"; data[6] = "FP-GTC"; data[7] = "0"; data[8] = "15"; long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); RowKeyEncoder rowKeyEncoder = new RowKeyEncoder(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(43 + rowKeyEncoder.getHeaderLength(), encodedKey.length); byte[] shard = Arrays.copyOfRange(encodedKey, 0, RowConstants.ROWKEY_SHARDID_LEN); @SuppressWarnings("unused") byte[] sellerId = Arrays.copyOfRange(encodedKey, rowKeyEncoder.getHeaderLength(), 4 + rowKeyEncoder.getHeaderLength()); byte[] cuboidId = Arrays.copyOfRange(encodedKey, RowConstants.ROWKEY_SHARDID_LEN, rowKeyEncoder.getHeaderLength()); byte[] rest = Arrays.copyOfRange(encodedKey, 4 + rowKeyEncoder.getHeaderLength(), encodedKey.length); assertEquals(0, Bytes.toShort(shard)); // assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { 11, 55, -13, 49, 49, 56, 52, 56, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest); }
Example #29
Source File: CubeJoinedFlatTableEnrich.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void parseCubeDesc() { Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc); // build index for rowkey columns List<TblColRef> cuboidColumns = baseCuboid.getColumns(); int rowkeyColCount = cubeDesc.getRowkey().getRowKeyColumns().length; rowKeyColumnIndexes = new int[rowkeyColCount]; for (int i = 0; i < rowkeyColCount; i++) { TblColRef col = cuboidColumns.get(i); rowKeyColumnIndexes[i] = flatDesc.getColumnIndex(col); } List<MeasureDesc> measures = cubeDesc.getMeasures(); int measureSize = measures.size(); measureColumnIndexes = new int[measureSize][]; for (int i = 0; i < measureSize; i++) { FunctionDesc func = measures.get(i).getFunction(); List<TblColRef> colRefs = func.getParameter().getColRefs(); if (colRefs == null) { measureColumnIndexes[i] = null; } else { measureColumnIndexes[i] = new int[colRefs.size()]; for (int j = 0; j < colRefs.size(); j++) { TblColRef c = colRefs.get(j); measureColumnIndexes[i][j] = flatDesc.getColumnIndex(c); } } } }
Example #30
Source File: InMemCubeBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private GridTable newGridTableByCuboidID(long cuboidID) throws IOException { GTInfo info = CubeGridTable.newGTInfo(Cuboid.findForMandatory(cubeDesc, cuboidID), new CubeDimEncMap(cubeDesc, dictionaryMap)); // Below several store implementation are very similar in performance. The ConcurrentDiskStore is the simplest. // MemDiskStore store = new MemDiskStore(info, memBudget == null ? MemoryBudgetController.ZERO_BUDGET : memBudget); // MemDiskStore store = new MemDiskStore(info, MemoryBudgetController.ZERO_BUDGET); IGTStore store = new ConcurrentDiskStore(info); GridTable gridTable = new GridTable(info, store); return gridTable; }