Java Code Examples for org.apache.kylin.cube.CubeSegment#getTSRange()
The following examples show how to use
org.apache.kylin.cube.CubeSegment#getTSRange() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SegmentPruner.java From kylin with Apache License 2.0 | 6 votes |
public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { DataModelDesc model = seg.getModel(); PartitionDesc part = model.getPartitionDesc(); if (!part.isPartitioned()) return null; if (!col.equals(part.getPartitionDateColumnRef())) return null; // deduce the dim range from TSRange TSRange tsRange = seg.getTSRange(); if (tsRange.start.isMin || tsRange.end.isMax) return null; // DimensionRangeInfo cannot express infinite String min = tsRangeToStr(tsRange.start.v, part); String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive return new DimensionRangeInfo(min, max); }
Example 2
Source File: BuildCubeWithEngine.java From kylin with Apache License 2.0 | 6 votes |
private void checkNormalSegRangeInfo(CubeSegment segment) throws IOException { if (segment != null && segment.getModel().getPartitionDesc().isPartitioned()) { segment = cubeManager.getCube(segment.getCubeDesc().getName()).getSegmentById(segment.getUuid()); TblColRef colRef = segment.getModel().getPartitionDesc().getPartitionDateColumnRef(); DimensionRangeInfo dmRangeInfo = segment.getDimensionRangeInfoMap().get(colRef.getIdentity()); if (dmRangeInfo != null) { long min_v = DateFormat.stringToMillis(dmRangeInfo.getMin()); long max_v = DateFormat.stringToMillis(dmRangeInfo.getMax()); long ts_range_start = segment.getTSRange().start.v; long ts_range_end = segment.getTSRange().end.v; if (!(ts_range_start <= min_v && max_v <= ts_range_end - 1)) { throw new RuntimeException(String.format(Locale.ROOT, "Build cube failed, wrong partition column min/max value." + " Segment: %s, min value: %s, TsRange.start: %s, max value: %s, TsRange.end: %s", segment, min_v, ts_range_start, max_v, ts_range_end)); } } } }
Example 3
Source File: StreamingServer.java From kylin with Apache License 2.0 | 6 votes |
private StreamingConsumerChannel createNewConsumer(String cubeName, List<Partition> partitions, ConsumerStartProtocol startProtocol) throws IOException { KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); StreamingSegmentManager segmentManager = getStreamingSegmentManager(cubeName); IStreamingSource streamingSource = StreamingSourceFactory.getStreamingSource(cube); IStreamingConnector streamingConnector = streamingSource.createStreamingConnector(cubeName, partitions, startProtocol, segmentManager); StreamingConsumerChannel consumer = new StreamingConsumerChannel(cubeName, streamingConnector, segmentManager, IStopConsumptionCondition.NEVER_STOP); long minAcceptEventTime = cube.getDescriptor().getPartitionDateStart(); CubeSegment latestRemoteSegment = cube.getLatestReadySegment(); if (latestRemoteSegment != null) { minAcceptEventTime = latestRemoteSegment.getTSRange().end.v; } if (minAcceptEventTime > 0 && minAcceptEventTime < System.currentTimeMillis()) { consumer.setMinAcceptEventTime(minAcceptEventTime); } StreamingCubeConsumeState consumeState = streamMetadataStore.getStreamingCubeConsumeState(cubeName); if (consumeState != null && consumeState == StreamingCubeConsumeState.PAUSED) { consumer.pause(false); } cubeConsumerMap.put(cubeName, consumer); return consumer; }
Example 4
Source File: StreamingServer.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private StreamingConsumerChannel createNewConsumer(String cubeName, List<Partition> partitions, ConsumerStartProtocol startProtocol) throws IOException { KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeInstance cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); StreamingSegmentManager segmentManager = getStreamingSegmentManager(cubeName); IStreamingSource streamingSource = StreamingSourceFactory.getStreamingSource(cube); IStreamingConnector streamingConnector = streamingSource.createStreamingConnector(cubeName, partitions, startProtocol, segmentManager); StreamingConsumerChannel consumer = new StreamingConsumerChannel(cubeName, streamingConnector, segmentManager, IStopConsumptionCondition.NEVER_STOP); long minAcceptEventTime = cube.getDescriptor().getPartitionDateStart(); CubeSegment latestRemoteSegment = cube.getLatestReadySegment(); if (latestRemoteSegment != null) { minAcceptEventTime = latestRemoteSegment.getTSRange().end.v; } if (minAcceptEventTime > 0) { consumer.setMinAcceptEventTime(minAcceptEventTime); } StreamingCubeConsumeState consumeState = streamMetadataStore.getStreamingCubeConsumeState(cubeName); if (consumeState != null && consumeState == StreamingCubeConsumeState.PAUSED) { consumer.pause(false); } cubeConsumerMap.put(cubeName, consumer); return consumer; }
Example 5
Source File: KeyValueBuilder.java From kylin with Apache License 2.0 | 6 votes |
/** * Use the segment start time as the map key, the time unit depends on the partition columns * If the partition_time_column is null, the unit is day; * otherwise, the unit is second */ private String getSegmentStartTime(CubeSegment segment) { long startTime = segment.getTSRange().start.v; DataModelDesc model = segment.getModel(); PartitionDesc partitionDesc = model.getPartitionDesc(); if (partitionDesc == null || !partitionDesc.isPartitioned()) { return "0"; } else if (partitionDesc.partitionColumnIsTimeMillis()) { return "" + startTime; } else if (partitionDesc.getPartitionTimeColumnRef() != null) { return "" + startTime / 1000L; } else if (partitionDesc.getPartitionDateColumnRef() != null) { return "" + startTime / 86400000L; } return "0"; }
Example 6
Source File: SegmentPruner.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { DataModelDesc model = seg.getModel(); PartitionDesc part = model.getPartitionDesc(); if (!part.isPartitioned()) return null; if (!col.equals(part.getPartitionDateColumnRef())) return null; // deduce the dim range from TSRange TSRange tsRange = seg.getTSRange(); if (tsRange.start.isMin || tsRange.end.isMax) return null; // DimensionRangeInfo cannot express infinite String min = tsRangeToStr(tsRange.start.v, part); String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive return new DimensionRangeInfo(min, max); }
Example 7
Source File: SegmentPrunerTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Test public void testPruneSegWithFilterIN() { // legacy cube segments does not have DimensionRangeInfo, but with TSRange can do some pruning CubeInstance cube = CubeManager.getInstance(getTestConfig()) .getCube("test_kylin_cube_without_slr_left_join_ready_2_segments"); TblColRef col = cube.getModel().findColumn("TEST_KYLIN_FACT.CAL_DT"); CubeSegment seg = cube.getSegments(SegmentStatusEnum.READY).get(0); TSRange tsRange = seg.getTSRange(); String start = DateFormat.formatToTimeStr(tsRange.start.v, "yyyy-MM-dd"); CubeSegment seg2 = cube.getSegments(SegmentStatusEnum.READY).get(1); TSRange tsRange2 = seg2.getTSRange(); try (SetAndUnsetSystemProp sns = new SetAndUnsetSystemProp("kylin.query.skip-empty-segments", "false")) { { TupleFilter inFilter = new ConstantTupleFilter(Sets.newHashSet(start, DateFormat.formatToTimeStr(tsRange2.end.v + 1000 * 60 * 60 * 24L, "yyyy-MM-dd"))); TupleFilter filter = compare(col, FilterOperatorEnum.IN, inFilter); SegmentPruner segmentPruner = new SegmentPruner(filter); Assert.assertTrue(segmentPruner.check(seg)); Assert.assertFalse(segmentPruner.check(seg2)); } } }
Example 8
Source File: StreamingSegmentManager.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void restoreSegmentsFromCP(List<File> segmentFolders, Map<Long, String> checkpointStoreStats, Map<Long, String> segmentSourceStartPositions, CubeSegment latestRemoteSegment) { if (segmentSourceStartPositions != null) { this.segmentSourceStartPositions.putAll(Maps.transformValues(segmentSourceStartPositions, new Function<String, ISourcePosition>() { @Nullable @Override public ISourcePosition apply(@Nullable String input) { return sourcePositionHandler.parsePosition(input); } })); } for (File segmentFolder : segmentFolders) { try { IStreamingSegmentStore segmentStore = getSegmentStore(segmentFolder.getName()); StreamingCubeSegment segment = StreamingCubeSegment.parseSegment(cubeInstance, segmentFolder, segmentStore); if (latestRemoteSegment != null && segment.getDateRangeEnd() <= latestRemoteSegment.getTSRange().end.v) { logger.info("remove segment:{} because it is late than remote segment", segment); removeSegmentFolder(segmentFolder); continue; } if (segment.isImmutable()) { immutableSegments.put(segment.getDateRangeStart(), segment); } else { // restore the active segment String segmentCheckpoint = checkpointStoreStats.get(segment.getDateRangeStart()); if (segmentCheckpoint == null) { removeSegmentFolder(segmentFolder); } else { segmentStore.restoreFromCheckpoint(segmentCheckpoint); } activeSegments.put(segment.getDateRangeStart(), segment); } } catch (Exception e) { logger.error("fail to restore segment from file:" + segmentFolder.getName(), e); } } }
Example 9
Source File: CubeService.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void keepCubeRetention(String cubeName) { logger.info("checking keepCubeRetention"); CubeInstance cube = getCubeManager().getCube(cubeName); CubeDesc desc = cube.getDescriptor(); if (desc.getRetentionRange() <= 0) return; synchronized (CubeService.class) { cube = getCubeManager().getCube(cubeName); List<CubeSegment> readySegs = cube.getSegments(SegmentStatusEnum.READY); if (readySegs.isEmpty()) return; List<CubeSegment> toRemoveSegs = Lists.newArrayList(); long tail = readySegs.get(readySegs.size() - 1).getTSRange().end.v; long head = tail - desc.getRetentionRange(); for (CubeSegment seg : readySegs) { if (seg.getTSRange().end.v > 0) { // for streaming cube its initial value is 0 if (seg.getTSRange().end.v <= head) { toRemoveSegs.add(seg); } } } if (toRemoveSegs.size() > 0) { try { getCubeManager().updateCubeDropSegments(cube, toRemoveSegs); } catch (IOException e) { logger.error("Failed to remove old segment from cube " + cubeName, e); } } } }
Example 10
Source File: CubeService.java From kylin with Apache License 2.0 | 5 votes |
private void keepCubeRetention(String cubeName) { logger.info("checking keepCubeRetention"); CubeInstance cube = getCubeManager().getCube(cubeName); CubeDesc desc = cube.getDescriptor(); if (desc.getRetentionRange() <= 0) return; synchronized (CubeService.class) { cube = getCubeManager().getCube(cubeName); List<CubeSegment> readySegs = cube.getSegments(SegmentStatusEnum.READY); if (readySegs.isEmpty()) return; List<CubeSegment> toRemoveSegs = Lists.newArrayList(); long tail = readySegs.get(readySegs.size() - 1).getTSRange().end.v; long head = tail - desc.getRetentionRange(); for (CubeSegment seg : readySegs) { if (seg.getTSRange().end.v > 0) { // for streaming cube its initial value is 0 if (seg.getTSRange().end.v <= head) { toRemoveSegs.add(seg); } } } if (toRemoveSegs.size() > 0) { try { getCubeManager().updateCubeDropSegments(cube, toRemoveSegs); } catch (IOException e) { logger.error("Failed to remove old segment from cube " + cubeName, e); } } } }
Example 11
Source File: StreamingSegmentManager.java From kylin with Apache License 2.0 | 5 votes |
private void restoreSegmentsFromCP(List<File> segmentFolders, Map<Long, String> checkpointStoreStats, Map<Long, String> segmentSourceStartPositions, CubeSegment latestRemoteSegment) { if (segmentSourceStartPositions != null) { this.segmentSourceStartPositions.putAll(Maps.transformValues(segmentSourceStartPositions, new Function<String, ISourcePosition>() { @Nullable @Override public ISourcePosition apply(@Nullable String input) { return sourcePositionHandler.parsePosition(input); } })); } for (File segmentFolder : segmentFolders) { try { IStreamingSegmentStore segmentStore = getSegmentStore(segmentFolder.getName()); StreamingCubeSegment segment = StreamingCubeSegment.parseSegment(cubeInstance, segmentFolder, segmentStore); if (latestRemoteSegment != null && segment.getDateRangeEnd() <= latestRemoteSegment.getTSRange().end.v) { logger.info("remove segment:{} because it is late than remote segment", segment); removeSegmentFolder(segmentFolder); continue; } if (segment.isImmutable()) { immutableSegments.put(segment.getDateRangeStart(), segment); } else { // restore the active segment String segmentCheckpoint = checkpointStoreStats.get(segment.getDateRangeStart()); if (segmentCheckpoint == null) { removeSegmentFolder(segmentFolder); } else { segmentStore.restoreFromCheckpoint(segmentCheckpoint); } activeSegments.put(segment.getDateRangeStart(), segment); } } catch (Exception e) { logger.error("fail to restore segment from file:" + segmentFolder.getName(), e); } } }
Example 12
Source File: StreamStorageQuery.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { GTCubeStorageQueryRequest request = getStorageQueryRequest(context, sqlDigest, returnTupleInfo); List<CubeSegmentScanner> scanners = Lists.newArrayList(); long maxHistorySegmentTime = -1; StreamingDataQueryPlanner segmentsPlanner = new StreamingDataQueryPlanner(cubeInstance.getDescriptor(), request.getFilter()); for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) { TSRange segmentRange = cubeSeg.getTSRange(); if (segmentRange.end.v > maxHistorySegmentTime) { maxHistorySegmentTime = cubeSeg.getTSRange().end.v; } CubeSegmentScanner scanner; if (cubeDesc.getConfig().isSkippingEmptySegments() && cubeSeg.getInputRecords() == 0) { logger.info("Skip cube segment {} because its input record is 0", cubeSeg); continue; } if (segmentsPlanner.canSkip(segmentRange.start.v, segmentRange.end.v)) { logger.info("Skip cube segment {} because of not satisfy filter:{}", cubeSeg, request.getFilter()); continue; } scanner = new CubeSegmentScanner(cubeSeg, request.getCuboid(), request.getDimensions(), request.getGroups(), request.getDynGroups(), request.getDynGroupExprs(), request.getMetrics(), request.getDynFuncs(), request.getFilter(), request.getHavingFilter(), request.getContext()); if (!scanner.isSegmentSkipped()) scanners.add(scanner); } ITupleIterator historyResult; if (scanners.isEmpty()) { historyResult = ITupleIterator.EMPTY_TUPLE_ITERATOR; } else { historyResult = new SequentialCubeTupleIterator(scanners, request.getCuboid(), request.getDimensions(), request.getDynGroups(), request.getGroups(), request.getMetrics(), returnTupleInfo, context, sqlDigest); } Set<TblColRef> dimensionsD = request.getDimensions(); if (dimensionsD.isEmpty()) { dimensionsD = Sets.newHashSet(request.getCuboid().getColumns()); // temporary fix for query like: select count(1) from TABLE } ITupleIterator realTimeResult; if (segmentsPlanner.canSkip(maxHistorySegmentTime, Long.MAX_VALUE)) { logger.info("Skip scan realTime data, {}", maxHistorySegmentTime); realTimeResult = ITupleIterator.EMPTY_TUPLE_ITERATOR; } else { boolean isSelectAllQuery = isSelectAllQuery(request.getCuboid(), request.getGroups(), request.getFilter()); int limitPushDown = isSelectAllQuery ? context.getFinalPushDownLimit() : Integer.MAX_VALUE; realTimeResult = realTimeSearchClient.search(maxHistorySegmentTime, cubeInstance, returnTupleInfo, request.getFilter(), dimensionsD, request.getGroups(), request.getMetrics(), limitPushDown, !isSelectAllQuery); } return new CompoundTupleIterator(Arrays.asList(historyResult, realTimeResult)); }
Example 13
Source File: Coordinator.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
private List<String> findSegmentsCanBuild(String cubeName) { List<String> result = Lists.newArrayList(); CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName); // in optimization if (isInOptimize(cubeInstance)) { return result; } int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments(); CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment(); long minSegmentStart = -1; if (latestHistoryReadySegment != null) { minSegmentStart = latestHistoryReadySegment.getTSRange().end.v; } else { // there is no ready segment, to make cube planner work, only 1 segment can build logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName); allowMaxBuildingSegments = 1; } CubeAssignment assignments = streamMetadataStore.getAssignmentsByCube(cubeName); Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs(); List<SegmentBuildState> segmentStates = streamMetadataStore.getSegmentBuildStates(cubeName); Collections.sort(segmentStates); // TODO need to check whether it is in optimization int inBuildingSegments = cubeInstance.getBuildingSegments().size(); int leftQuota = allowMaxBuildingSegments - inBuildingSegments; for (int i = 0; i < segmentStates.size(); i++) { SegmentBuildState segmentState = segmentStates.get(i); Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName()); if (segmentRange.getFirst() < minSegmentStart) { logger.warn("the cube segment state is not clear correctly, cube:{} segment:{}, clear it", cubeName, segmentState.getSegmentName()); streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName()); continue; } if (segmentState.isInBuilding()) { inBuildingSegments++; String jobId = segmentState.getState().getJobId(); logger.info("there is segment in building, cube:{} segment:{} jobId:{}", cubeName, segmentState.getSegmentName(), jobId); long buildStartTime = segmentState.getState().getBuildStartTime(); if (buildStartTime != 0 && jobId != null) { long buildDuration = System.currentTimeMillis() - buildStartTime; if (buildDuration < 40 * 60 * 1000) { // if build time larger than 40 minutes, check the job status continue; } CubingJob cubingJob = (CubingJob) getExecutableManager().getJob(jobId); ExecutableState jobState = cubingJob.getStatus(); if (ExecutableState.SUCCEED.equals(jobState)) { // job is already succeed, remove the build state CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null); if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) { logger.info( "job:{} is already succeed, and segment:{} is ready, remove segment build state", jobId, segmentState.getSegmentName()); streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName()); } continue; } else if (ExecutableState.ERROR.equals(jobState)) { logger.info("job:{} is error, resume the job", jobId); getExecutableManager().resumeJob(jobId); continue; } else if (ExecutableState.DISCARDED.equals(jobState)) { // if the job has been discard manually, just think that the segment is not in building logger.info("job:{} is discard, reset the job state in metaStore", jobId); SegmentBuildState.BuildState state = new SegmentBuildState.BuildState(); state.setBuildStartTime(0); state.setState(SegmentBuildState.BuildState.State.WAIT); state.setJobId(cubingJob.getId()); streamMetadataStore.updateSegmentBuildState(cubeName, segmentState.getSegmentName(), state); segmentState.setState(state); logger.info("segment:{} is discard", segmentState.getSegmentName()); continue; } else { logger.info("job:{} is in running, job state: {}", jobId, jobState); continue; } } } if (leftQuota <= 0) { logger.info("No left quota to build segments for cube:{}", cubeName); return result; } if (!checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets)) { break; } result.add(segmentState.getSegmentName()); leftQuota--; } return result; }
Example 14
Source File: ExtendCubeToHybridCLI.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
public void createFromCube(String projectName, String cubeName, String partitionDateStr) throws Exception { logger.info("Create hybrid for cube[" + cubeName + "], project[" + projectName + "], partition_date[" + partitionDateStr + "]."); CubeInstance cubeInstance = cubeManager.getCube(cubeName); if (!validateCubeInstance(cubeInstance)) { return; } CubeDesc cubeDesc = cubeDescManager.getCubeDesc(cubeInstance.getDescName()); DataModelDesc dataModelDesc = metadataManager.getDataModelDesc(cubeDesc.getModelName()); if (StringUtils.isEmpty(dataModelDesc.getPartitionDesc().getPartitionDateColumn())) { logger.error("No incremental cube, no need to extend."); return; } String owner = cubeInstance.getOwner(); long partitionDate = partitionDateStr != null ? DateFormat.stringToMillis(partitionDateStr) : 0; // get new name for old cube and cube_desc String newCubeDescName = renameCube(cubeDesc.getName()); String newCubeInstanceName = renameCube(cubeInstance.getName()); while (cubeDescManager.getCubeDesc(newCubeDescName) != null) newCubeDescName = renameCube(newCubeDescName); while (cubeManager.getCube(newCubeInstanceName) != null) newCubeInstanceName = renameCube(newCubeInstanceName); // create new cube_instance for old segments CubeInstance newCubeInstance = CubeInstance.getCopyOf(cubeInstance); newCubeInstance.setName(newCubeInstanceName); newCubeInstance.setDescName(newCubeDescName); newCubeInstance.updateRandomUuid(); Iterator<CubeSegment> segmentIterator = newCubeInstance.getSegments().iterator(); CubeSegment currentSeg = null; while (segmentIterator.hasNext()) { currentSeg = segmentIterator.next(); if (partitionDateStr != null && (currentSeg.getTSRange().start.v >= partitionDate || currentSeg.getTSRange().end.v > partitionDate)) { segmentIterator.remove(); logger.info("CubeSegment[" + currentSeg + "] was removed."); } } if (partitionDateStr != null && partitionDate != currentSeg.getTSRange().end.v) { logger.error("PartitionDate must be end date of one segment."); return; } if (currentSeg != null && partitionDateStr == null) partitionDate = currentSeg.getTSRange().end.v; cubeManager.createCube(newCubeInstance, projectName, owner); logger.info("CubeInstance was saved at: " + newCubeInstance.getResourcePath()); // create new cube for old segments CubeDesc newCubeDesc = CubeDesc.getCopyOf(cubeDesc); newCubeDesc.setName(newCubeDescName); newCubeDesc.updateRandomUuid(); newCubeDesc.init(kylinConfig); newCubeDesc.setPartitionDateEnd(partitionDate); newCubeDesc.calculateSignature(); cubeDescManager.createCubeDesc(newCubeDesc); logger.info("CubeDesc was saved at: " + newCubeDesc.getResourcePath()); // update old cube_desc to new-version metadata cubeDesc.setPartitionDateStart(partitionDate); cubeDesc.setEngineType(IEngineAware.ID_MR_V2); cubeDesc.setStorageType(IStorageAware.ID_SHARDED_HBASE); cubeDesc.calculateSignature(); cubeDescManager.updateCubeDesc(cubeDesc); logger.info("CubeDesc was saved at: " + cubeDesc.getResourcePath()); // clear segments for old cube cubeInstance.setSegments(new Segments<CubeSegment>()); cubeInstance.setStatus(RealizationStatusEnum.DISABLED); store.checkAndPutResource(cubeInstance.getResourcePath(), cubeInstance, CubeManager.CUBE_SERIALIZER); logger.info("CubeInstance was saved at: " + cubeInstance.getResourcePath()); // create hybrid model for these two cubes List<RealizationEntry> realizationEntries = Lists.newArrayListWithCapacity(2); realizationEntries.add(RealizationEntry.create(RealizationType.CUBE, cubeInstance.getName())); realizationEntries.add(RealizationEntry.create(RealizationType.CUBE, newCubeInstance.getName())); HybridInstance hybridInstance = HybridInstance.create(kylinConfig, renameHybrid(cubeInstance.getName()), realizationEntries); store.checkAndPutResource(hybridInstance.getResourcePath(), hybridInstance, HybridManager.HYBRID_SERIALIZER); ProjectManager.getInstance(kylinConfig).moveRealizationToProject(RealizationType.HYBRID, hybridInstance.getName(), projectName, owner); logger.info("HybridInstance was saved at: " + hybridInstance.getResourcePath()); // copy Acl from old cube to new cube copyAcl(cubeInstance.getId(), newCubeInstance.getId(), projectName); logger.info("Acl copied from [" + cubeName + "] to [" + newCubeInstanceName + "]."); }
Example 15
Source File: StreamStorageQuery.java From kylin with Apache License 2.0 | 4 votes |
@Override public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { GTCubeStorageQueryRequest request = getStorageQueryRequest(context, sqlDigest, returnTupleInfo); List<CubeSegmentScanner> scanners = Lists.newArrayList(); long maxHistorySegmentTime = -1; StreamingDataQueryPlanner segmentsPlanner = new StreamingDataQueryPlanner(cubeInstance.getDescriptor(), request.getFilter()); long current = System.currentTimeMillis(); for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) { TSRange segmentRange = cubeSeg.getTSRange(); if (segmentRange.end.v > maxHistorySegmentTime) { if (cubeSeg.getTSRange().end.v < current) { // In normal case, the segment for future time range is not reasonable in streaming case maxHistorySegmentTime = cubeSeg.getTSRange().end.v; } } CubeSegmentScanner scanner; if (cubeDesc.getConfig().isSkippingEmptySegments() && cubeSeg.getInputRecords() == 0) { logger.info("Skip cube segment {} because its input record is 0", cubeSeg); continue; } if (segmentsPlanner.canSkip(segmentRange.start.v, segmentRange.end.v)) { logger.info("Skip cube segment {} because of not satisfy filter:{}", cubeSeg, request.getFilter()); continue; } scanner = new CubeSegmentScanner(cubeSeg, request.getCuboid(), request.getDimensions(), request.getGroups(), request.getDynGroups(), request.getDynGroupExprs(), request.getMetrics(), request.getDynFuncs(), request.getFilter(), request.getHavingFilter(), request.getContext()); if (!scanner.isSegmentSkipped()) scanners.add(scanner); } ITupleIterator historyResult; if (scanners.isEmpty()) { historyResult = ITupleIterator.EMPTY_TUPLE_ITERATOR; } else { historyResult = new SequentialCubeTupleIterator(scanners, request.getCuboid(), request.getDimensions(), request.getDynGroups(), request.getGroups(), request.getMetrics(), returnTupleInfo, context, sqlDigest); } Set<TblColRef> dimensionsD = request.getDimensions(); if (dimensionsD.isEmpty()) { dimensionsD = Sets.newHashSet(request.getCuboid().getColumns()); // temporary fix for query like: select count(1) from TABLE } ITupleIterator realTimeResult; if (segmentsPlanner.canSkip(maxHistorySegmentTime, Long.MAX_VALUE)) { logger.info("Skip scan realTime data, {}", maxHistorySegmentTime); realTimeResult = ITupleIterator.EMPTY_TUPLE_ITERATOR; } else { boolean isSelectAllQuery = isSelectAllQuery(request.getCuboid(), request.getGroups(), request.getFilter()); int limitPushDown = isSelectAllQuery ? context.getFinalPushDownLimit() : Integer.MAX_VALUE; realTimeResult = realTimeSearchClient.search(maxHistorySegmentTime, cubeInstance, returnTupleInfo, request.getFilter(), dimensionsD, request.getGroups(), request.getMetrics(), limitPushDown, !isSelectAllQuery); } return new CompoundTupleIterator(Arrays.asList(historyResult, realTimeResult)); }
Example 16
Source File: Coordinator.java From kylin with Apache License 2.0 | 4 votes |
private List<String> findSegmentsCanBuild(String cubeName) { List<String> result = Lists.newArrayList(); CubeInstance cubeInstance = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName); // in optimization if (isInOptimize(cubeInstance)) { return result; } int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments(); CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment(); long minSegmentStart = -1; if (latestHistoryReadySegment != null) { minSegmentStart = latestHistoryReadySegment.getTSRange().end.v; } else { // there is no ready segment, to make cube planner work, only 1 segment can build logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName); allowMaxBuildingSegments = 1; } CubeAssignment assignments = streamMetadataStore.getAssignmentsByCube(cubeName); Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs(); List<SegmentBuildState> segmentStates = streamMetadataStore.getSegmentBuildStates(cubeName); Collections.sort(segmentStates); // TODO need to check whether it is in optimization int inBuildingSegments = cubeInstance.getBuildingSegments().size(); int leftQuota = allowMaxBuildingSegments - inBuildingSegments; for (int i = 0; i < segmentStates.size(); i++) { SegmentBuildState segmentState = segmentStates.get(i); Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName()); if (segmentRange.getFirst() < minSegmentStart) { logger.warn("the cube segment state is not clear correctly, cube:{} segment:{}, clear it", cubeName, segmentState.getSegmentName()); streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName()); continue; } if (segmentState.isInBuilding()) { inBuildingSegments++; String jobId = segmentState.getState().getJobId(); logger.info("there is segment in building, cube:{} segment:{} jobId:{}", cubeName, segmentState.getSegmentName(), jobId); long buildStartTime = segmentState.getState().getBuildStartTime(); if (buildStartTime != 0 && jobId != null) { long buildDuration = System.currentTimeMillis() - buildStartTime; if (buildDuration < 40 * 60 * 1000) { // if build time larger than 40 minutes, check the job status continue; } CubingJob cubingJob = (CubingJob) getExecutableManager().getJob(jobId); ExecutableState jobState = cubingJob.getStatus(); if (ExecutableState.SUCCEED.equals(jobState)) { // job is already succeed, remove the build state CubeSegment cubeSegment = cubeInstance.getSegment(segmentState.getSegmentName(), null); if (cubeSegment != null && SegmentStatusEnum.READY == cubeSegment.getStatus()) { logger.info( "job:{} is already succeed, and segment:{} is ready, remove segment build state", jobId, segmentState.getSegmentName()); streamMetadataStore.removeSegmentBuildState(cubeName, segmentState.getSegmentName()); } continue; } else if (ExecutableState.ERROR.equals(jobState)) { logger.info("job:{} is error, resume the job", jobId); getExecutableManager().resumeJob(jobId); continue; } else if (ExecutableState.DISCARDED.equals(jobState)) { // if the job has been discard manually, just think that the segment is not in building logger.info("job:{} is discard, reset the job state in metaStore", jobId); SegmentBuildState.BuildState state = new SegmentBuildState.BuildState(); state.setBuildStartTime(0); state.setState(SegmentBuildState.BuildState.State.WAIT); state.setJobId(cubingJob.getId()); streamMetadataStore.updateSegmentBuildState(cubeName, segmentState.getSegmentName(), state); segmentState.setState(state); logger.info("segment:{} is discard", segmentState.getSegmentName()); continue; } else { logger.info("job:{} is in running, job state: {}", jobId, jobState); continue; } } } if (leftQuota <= 0) { logger.info("No left quota to build segments for cube:{}", cubeName); return result; } if (!checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets)) { break; } result.add(segmentState.getSegmentName()); leftQuota--; } return result; }
Example 17
Source File: BuildJobSubmitter.java From kylin with Apache License 2.0 | 4 votes |
/** * @return list of segment which could be submitted a segment build job */ @NonSideEffect List<String> checkSegmentBuildJobFromMetadata(String cubeName) { List<String> result = Lists.newArrayList(); CubeInstance cubeInstance = coordinator.getCubeManager().getCube(cubeName); // in optimization if (isInOptimize(cubeInstance)) { return result; } int allowMaxBuildingSegments = cubeInstance.getConfig().getMaxBuildingSegments(); CubeSegment latestHistoryReadySegment = cubeInstance.getLatestReadySegment(); long minSegmentStart = -1; if (latestHistoryReadySegment != null) { minSegmentStart = latestHistoryReadySegment.getTSRange().end.v; } else { // there is no ready segment, to make cube planner work, only 1 segment can build logger.info("there is no ready segments for cube:{}, so only allow 1 segment build concurrently", cubeName); allowMaxBuildingSegments = 1; } CubeAssignment assignments = coordinator.getStreamMetadataStore().getAssignmentsByCube(cubeName); Set<Integer> cubeAssignedReplicaSets = assignments.getReplicaSetIDs(); List<SegmentBuildState> segmentStates = coordinator.getStreamMetadataStore().getSegmentBuildStates(cubeName); int inBuildingSegments = cubeInstance.getBuildingSegments().size(); int leftQuota = allowMaxBuildingSegments - inBuildingSegments; boolean stillQuotaForNewSegment = true; // Sort it so we can iterate segments from eariler one to newer one Collections.sort(segmentStates); for (int i = 0; i < segmentStates.size(); i++) { boolean needRebuild = false; if (leftQuota <= 0) { logger.info("No left quota to build segments for cube:{} at {}", cubeName, leftQuota); stillQuotaForNewSegment = false; } SegmentBuildState segmentState = segmentStates.get(i); Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentState.getSegmentName()); // If we have a exist historcial segment, we should not let new realtime segment overwrite it, it is so dangrous, // we just delete the entry to ignore the segment which should not exist if (segmentRange.getFirst() < minSegmentStart) { logger.warn( "The cube segment state is not correct because it belongs to historcial part, cube:{} segment:{}, clear it.", cubeName, segmentState.getSegmentName()); coordinator.getStreamMetadataStore().removeSegmentBuildState(cubeName, segmentState.getSegmentName()); continue; } // We already have a building job for current segment if (segmentState.isInBuilding()) { needRebuild = checkSegmentBuildingJob(segmentState, cubeName, cubeInstance); if (!needRebuild) continue; } else if (segmentState.isInWaiting()) { // The data maybe uploaded to remote completely, or job is discard // These two case should be submit a building job, just let go through it } boolean readyToBuild = checkSegmentIsReadyToBuild(segmentStates, i, cubeAssignedReplicaSets); if (!readyToBuild) { logger.debug("Segment {} {} is not ready to submit a building job.", cubeName, segmentState); } else if (stillQuotaForNewSegment || needRebuild) { result.add(segmentState.getSegmentName()); leftQuota--; } } if (logger.isDebugEnabled() && !result.isEmpty()) { logger.debug("{} Candidate segment list to be built : {}.", cubeName, String.join(", ", result)); } return result; }
Example 18
Source File: BuildJobSubmitter.java From kylin with Apache License 2.0 | 4 votes |
/** * Submit a build job for streaming segment * * @return true if submit succeed ; else false */ @NotAtomicIdempotent boolean submitSegmentBuildJob(String cubeName, String segmentName) { logger.info("Try submit streaming segment build job, cube:{} segment:{}", cubeName, segmentName); CubeInstance cubeInstance = coordinator.getCubeManager().getCube(cubeName); try { // Step 1. create a new segment if not exists CubeSegment newSeg = null; Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentName); boolean segmentExists = false; for (CubeSegment segment : cubeInstance.getSegments()) { SegmentRange.TSRange tsRange = segment.getTSRange(); if (tsRange.start.v.equals(segmentRange.getFirst()) && segmentRange.getSecond().equals(tsRange.end.v)) { segmentExists = true; newSeg = segment; } } if (segmentExists) { logger.warn("Segment {} exists, it will be forced deleted.", segmentName); coordinator.getCubeManager().updateCubeDropSegments(cubeInstance, newSeg); } logger.debug("Create segment for {} {} .", cubeName, segmentName); newSeg = coordinator.getCubeManager().appendSegment(cubeInstance, new SegmentRange.TSRange(segmentRange.getFirst(), segmentRange.getSecond())); // Step 2. create and submit new build job DefaultChainedExecutable executable = getStreamingCubingJob(newSeg); coordinator.getExecutableManager().addJob(executable); String jobId = executable.getId(); newSeg.setLastBuildJobID(jobId); // Step 3. add it to job trigger list SegmentJobBuildInfo segmentJobBuildInfo = new SegmentJobBuildInfo(cubeName, segmentName, jobId); addToJobTrackList(segmentJobBuildInfo); // Step 4. add job to stream metadata in case of current node dead SegmentBuildState.BuildState state = new SegmentBuildState.BuildState(); state.setBuildStartTime(System.currentTimeMillis()); state.setState(SegmentBuildState.BuildState.State.BUILDING); state.setJobId(jobId); logger.debug("Commit building job {} for {} {} .", jobId, cubeName, segmentName); coordinator.getStreamMetadataStore().updateSegmentBuildState(cubeName, segmentName, state); return true; } catch (Exception e) { logger.error("Streaming job submit fail, cubeName:" + cubeName + " segment:" + segmentName, e); return false; } }
Example 19
Source File: ExtendCubeToHybridCLI.java From kylin with Apache License 2.0 | 4 votes |
public void createFromCube(String projectName, String cubeName, String partitionDateStr) throws Exception { logger.info("Create hybrid for cube[" + cubeName + "], project[" + projectName + "], partition_date[" + partitionDateStr + "]."); CubeInstance cubeInstance = cubeManager.getCube(cubeName); if (!validateCubeInstance(cubeInstance)) { return; } CubeDesc cubeDesc = cubeDescManager.getCubeDesc(cubeInstance.getDescName()); DataModelDesc dataModelDesc = metadataManager.getDataModelDesc(cubeDesc.getModelName()); if (StringUtils.isEmpty(dataModelDesc.getPartitionDesc().getPartitionDateColumn())) { logger.error("No incremental cube, no need to extend."); return; } String owner = cubeInstance.getOwner(); long partitionDate = partitionDateStr != null ? DateFormat.stringToMillis(partitionDateStr) : 0; // get new name for old cube and cube_desc String newCubeDescName = renameCube(cubeDesc.getName()); String newCubeInstanceName = renameCube(cubeInstance.getName()); while (cubeDescManager.getCubeDesc(newCubeDescName) != null) newCubeDescName = renameCube(newCubeDescName); while (cubeManager.getCube(newCubeInstanceName) != null) newCubeInstanceName = renameCube(newCubeInstanceName); // create new cube_instance for old segments CubeInstance newCubeInstance = CubeInstance.getCopyOf(cubeInstance); newCubeInstance.setName(newCubeInstanceName); newCubeInstance.setDescName(newCubeDescName); newCubeInstance.updateRandomUuid(); Iterator<CubeSegment> segmentIterator = newCubeInstance.getSegments().iterator(); CubeSegment currentSeg = null; while (segmentIterator.hasNext()) { currentSeg = segmentIterator.next(); if (partitionDateStr != null && (currentSeg.getTSRange().start.v >= partitionDate || currentSeg.getTSRange().end.v > partitionDate)) { segmentIterator.remove(); logger.info("CubeSegment[" + currentSeg + "] was removed."); } } if (currentSeg != null && partitionDateStr != null && partitionDate != currentSeg.getTSRange().end.v) { logger.error("PartitionDate must be end date of one segment."); return; } if (currentSeg != null && partitionDateStr == null) partitionDate = currentSeg.getTSRange().end.v; cubeManager.createCube(newCubeInstance, projectName, owner); logger.info("CubeInstance was saved at: " + newCubeInstance.getResourcePath()); // create new cube for old segments CubeDesc newCubeDesc = CubeDesc.getCopyOf(cubeDesc); newCubeDesc.setName(newCubeDescName); newCubeDesc.updateRandomUuid(); newCubeDesc.init(kylinConfig); newCubeDesc.setPartitionDateEnd(partitionDate); newCubeDesc.calculateSignature(); cubeDescManager.createCubeDesc(newCubeDesc); logger.info("CubeDesc was saved at: " + newCubeDesc.getResourcePath()); // update old cube_desc to new-version metadata cubeDesc.setPartitionDateStart(partitionDate); cubeDesc.setEngineType(IEngineAware.ID_MR_V2); cubeDesc.setStorageType(IStorageAware.ID_SHARDED_HBASE); cubeDesc.calculateSignature(); cubeDescManager.updateCubeDesc(cubeDesc); logger.info("CubeDesc was saved at: " + cubeDesc.getResourcePath()); // clear segments for old cube cubeInstance.setSegments(new Segments()); cubeInstance.setStatus(RealizationStatusEnum.DISABLED); store.checkAndPutResource(cubeInstance.getResourcePath(), cubeInstance, CubeManager.CUBE_SERIALIZER); logger.info("CubeInstance was saved at: " + cubeInstance.getResourcePath()); // create hybrid model for these two cubes List<RealizationEntry> realizationEntries = Lists.newArrayListWithCapacity(2); realizationEntries.add(RealizationEntry.create(RealizationType.CUBE, cubeInstance.getName())); realizationEntries.add(RealizationEntry.create(RealizationType.CUBE, newCubeInstance.getName())); HybridInstance hybridInstance = HybridInstance.create(kylinConfig, renameHybrid(cubeInstance.getName()), realizationEntries); store.checkAndPutResource(hybridInstance.getResourcePath(), hybridInstance, HybridManager.HYBRID_SERIALIZER); ProjectManager.getInstance(kylinConfig).moveRealizationToProject(RealizationType.HYBRID, hybridInstance.getName(), projectName, owner); logger.info("HybridInstance was saved at: " + hybridInstance.getResourcePath()); // copy Acl from old cube to new cube copyAcl(cubeInstance.getId(), newCubeInstance.getId(), projectName); logger.info("Acl copied from [" + cubeName + "] to [" + newCubeInstanceName + "]."); }
Example 20
Source File: BuildJobSubmitter.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
/** * Submit a build job for streaming segment * * @return true if submit succeed ; else false */ @NotAtomicIdempotent boolean submitSegmentBuildJob(String cubeName, String segmentName) { logger.info("Try submit streaming segment build job, cube:{} segment:{}", cubeName, segmentName); CubeInstance cubeInstance = coordinator.getCubeManager().getCube(cubeName); try { // Step 1. create a new segment if not exists CubeSegment newSeg = null; Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentName); boolean segmentExists = false; for (CubeSegment segment : cubeInstance.getSegments()) { SegmentRange.TSRange tsRange = segment.getTSRange(); if (tsRange.start.v.equals(segmentRange.getFirst()) && segmentRange.getSecond().equals(tsRange.end.v)) { segmentExists = true; newSeg = segment; } } if (segmentExists) { logger.warn("Segment {} exists, it will be forced deleted.", segmentName); coordinator.getCubeManager().updateCubeDropSegments(cubeInstance, newSeg); } logger.debug("Create segment for {} {} .", cubeName, segmentName); newSeg = coordinator.getCubeManager().appendSegment(cubeInstance, new SegmentRange.TSRange(segmentRange.getFirst(), segmentRange.getSecond())); // Step 2. create and submit new build job DefaultChainedExecutable executable = getStreamingCubingJob(newSeg); coordinator.getExecutableManager().addJob(executable); String jobId = executable.getId(); newSeg.setLastBuildJobID(jobId); // Step 3. add it to job trigger list SegmentJobBuildInfo segmentJobBuildInfo = new SegmentJobBuildInfo(cubeName, segmentName, jobId); addToJobTrackList(segmentJobBuildInfo); // Step 4. add job to stream metadata in case of current node dead SegmentBuildState.BuildState state = new SegmentBuildState.BuildState(); state.setBuildStartTime(System.currentTimeMillis()); state.setState(SegmentBuildState.BuildState.State.BUILDING); state.setJobId(jobId); logger.debug("Commit building job {} for {} {} .", jobId, cubeName, segmentName); coordinator.getStreamMetadataStore().updateSegmentBuildState(cubeName, segmentName, state); return true; } catch (Exception e) { logger.error("Streaming job submit fail, cubeName:" + cubeName + " segment:" + segmentName, e); return false; } }