org.apache.kylin.cube.CubeSegment Java Examples
The following examples show how to use
org.apache.kylin.cube.CubeSegment.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CubeService.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void cleanSegmentStorage(List<CubeSegment> toRemoveSegs) throws IOException { if (!KylinConfig.getInstanceFromEnv().cleanStorageAfterDelOperation()) { return; } if (toRemoveSegs != null && !toRemoveSegs.isEmpty()) { List<String> toDropHTables = Lists.newArrayListWithCapacity(toRemoveSegs.size()); List<String> toDelHDFSPaths = Lists.newArrayListWithCapacity(toRemoveSegs.size()); for (CubeSegment seg : toRemoveSegs) { toDropHTables.add(seg.getStorageLocationIdentifier()); toDelHDFSPaths.add(JobBuilderSupport.getJobWorkingDir(seg.getConfig().getHdfsWorkingDirectory(), seg.getLastBuildJobID())); } StorageCleanUtil.dropHTables(new HBaseAdmin(HBaseConnection.getCurrentHBaseConfiguration()), toDropHTables); StorageCleanUtil.deleteHDFSPath(HadoopUtil.getWorkingFileSystem(), toDelHDFSPaths); } }
Example #2
Source File: Coordinator.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private boolean isInOptimize(CubeInstance cube) { Segments<CubeSegment> readyPendingSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING); if (readyPendingSegments.size() > 0) { logger.info("The cube {} has READY_PENDING segments {}. It's not allowed for building", cube.getName(), readyPendingSegments); return true; } Segments<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW); for (CubeSegment newSegment : newSegments) { String jobId = newSegment.getLastBuildJobID(); if (jobId == null) { continue; } AbstractExecutable job = getExecutableManager().getJob(jobId); if (job != null && job instanceof CubingJob) { CubingJob cubingJob = (CubingJob) job; if (CubingJob.CubingJobTypeEnum.OPTIMIZE.toString().equals(cubingJob.getJobType())) { logger.info( "The cube {} is in optimization. It's not allowed to build new segments during optimization.", cube.getName()); return true; } } } return false; }
Example #3
Source File: StatisticsDecisionUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static void optimizeCubingPlan(CubeSegment segment) throws IOException { if (isAbleToOptimizeCubingPlan(segment)) { logger.info("It's able to trigger cuboid planner algorithm."); } else { return; } Map<Long, Long> recommendCuboidsWithStats = CuboidRecommenderUtil.getRecommendCuboidList(segment); if (recommendCuboidsWithStats == null || recommendCuboidsWithStats.isEmpty()) { return; } CubeInstance cube = segment.getCubeInstance(); CubeUpdate update = new CubeUpdate(cube.latestCopyForWrite()); update.setCuboids(recommendCuboidsWithStats); CubeManager.getInstance(cube.getConfig()).updateCube(update); }
Example #4
Source File: CuboidStatsReaderUtil.java From kylin with Apache License 2.0 | 6 votes |
public static Pair<Map<Long, Long>, Long> readCuboidStatsWithSourceFromSegment(Set<Long> cuboidIds, CubeSegment cubeSegment) throws IOException { if (cubeSegment == null) { logger.warn("The cube segment can not be " + null); return null; } CubeStatsReader cubeStatsReader = new CubeStatsReader(cubeSegment, null, cubeSegment.getConfig()); if (cubeStatsReader.getCuboidRowEstimatesHLL() == null || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) { logger.info("Cuboid Statistics is not enabled."); return null; } Map<Long, Long> cuboidsWithStatsAll = cubeStatsReader.getCuboidRowEstimatesHLL(); Map<Long, Long> cuboidsWithStats = Maps.newHashMapWithExpectedSize(cuboidIds.size()); for (Long cuboid : cuboidIds) { Long rowEstimate = cuboidsWithStatsAll.get(cuboid); if (rowEstimate == null) { logger.warn("Cannot get the row count stats for cuboid " + cuboid); } else { cuboidsWithStats.put(cuboid, rowEstimate); } } return new Pair<>(cuboidsWithStats, cubeStatsReader.sourceRowCount); }
Example #5
Source File: StatisticsDecisionUtil.java From kylin with Apache License 2.0 | 6 votes |
public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) { CubeInstance cube = segment.getCubeInstance(); if (!cube.getConfig().isCubePlannerEnabled()) return false; if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) { logger.info("Has read pending segments and will not enable cube planner."); return false; } List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY); List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW); if (newSegments.size() <= 1 && // (readySegments.size() == 0 || // (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1 && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) { return true; } else { return false; } }
Example #6
Source File: FlinkBatchCubingJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable, final String jobId, final String cuboidRootPath) { final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg); final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName()); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(), tablePath); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId)); flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); flinkExecutable.setJobId(jobId); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars()); flinkExecutable.setJars(jars.toString()); flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE); }
Example #7
Source File: JobService.java From Kylin with Apache License 2.0 | 6 votes |
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN + " or hasPermission(#job, 'ADMINISTRATION') or hasPermission(#job, 'OPERATION') or hasPermission(#job, 'MANAGEMENT')") public JobInstance cancelJob(String jobId) throws IOException, JobException { // CubeInstance cube = this.getCubeManager().getCube(job.getRelatedCube()); // for (BuildCubeJob cubeJob: listAllCubingJobs(cube.getName(), null, EnumSet.of(ExecutableState.READY, ExecutableState.RUNNING))) { // getExecutableManager().stopJob(cubeJob.getId()); // } final JobInstance jobInstance = getJobInstance(jobId); final String segmentId = jobInstance.getRelatedSegment(); CubeInstance cubeInstance = getCubeManager().getCube(jobInstance.getRelatedCube()); final CubeSegment segment = cubeInstance.getSegmentById(segmentId); if (segment.getStatus() == SegmentStatusEnum.NEW) { cubeInstance.getSegments().remove(segment); getCubeManager().updateCube(cubeInstance); } getExecutableManager().discardJob(jobId); return jobInstance; }
Example #8
Source File: HybridCubeCLI.java From kylin with Apache License 2.0 | 6 votes |
private void checkSegmentOffset(List<RealizationEntry> realizationEntries) { List<SegmentRange> segmentRanges = Lists.newArrayList(); for (RealizationEntry entry : realizationEntries) { if (entry.getType() != RealizationType.CUBE) { throw new IllegalArgumentException("Wrong realization type: " + entry.getType() + ", only cube supported. "); } CubeInstance cubeInstance = cubeManager.getCube(entry.getRealization()); Segments<CubeSegment> segments = cubeInstance.getSegments(); for (CubeSegment segment : segments) { segmentRanges.add(segment.getSegRange()); } } if (segmentRanges.size() >= 2) { Collections.sort(segmentRanges); for (int i = 0; i < segmentRanges.size() - 1; i++) { if (segmentRanges.get(i).overlaps(segmentRanges.get(i + 1))) { throw new IllegalArgumentException("Segments has overlap, could not hybrid. First Segment Range: [" + segmentRanges.get(i).start.v + "," + segmentRanges.get(i).end.v + "], Second Segment Range: [" + segmentRanges.get(i + 1).start.v + "," + segmentRanges.get(i + 1).end.v + "]"); } } } }
Example #9
Source File: SparkBatchMergeJobBuilder2.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) { final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig()); sparkExecutable.setClassName(SparkMergingDictionary.class.getName()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID)); sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ",")); sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID)); sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID)); sparkExecutable.setJobId(jobID); sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString()); sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY); StringBuilder jars = new StringBuilder(); StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); return sparkExecutable; }
Example #10
Source File: SparkExecutable.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void attachSegmentsMetadataWithDict(List<CubeSegment> segments) throws IOException { Set<String> dumpList = new LinkedHashSet<>( JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance())); ResourceStore rs = ResourceStore.getStore(segments.get(0).getConfig()); for (CubeSegment segment : segments) { dumpList.addAll(segment.getDictionaryPaths()); if (rs.exists(segment.getStatisticsResourcePath())) { // cube statistics is not available for new segment dumpList.add(segment.getStatisticsResourcePath()); } //tiretree global domain dic CubeDescTiretreeGlobalDomainDictUtil.cuboidJob(segment.getCubeDesc(), dumpList); } JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(), this.getParam(SparkCubingByLayer.OPTION_META_URL.getOpt())); }
Example #11
Source File: KafkaFlatTableJob.java From kylin with Apache License 2.0 | 6 votes |
private void setupMapper(CubeSegment cubeSeg) throws IOException { // set the segment's offset info to job conf Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart(); Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd(); Integer minPartition = Collections.min(offsetStart.keySet()); Integer maxPartition = Collections.max(offsetStart.keySet()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString()); for(Integer partition: offsetStart.keySet()) { job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString()); } job.setMapperClass(KafkaFlatTableMapper.class); job.setInputFormatClass(KafkaInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); }
Example #12
Source File: SerializedHBaseTupleIterator.java From Kylin with Apache License 2.0 | 6 votes |
public SerializedHBaseTupleIterator(HConnection conn, List<HBaseKeyRange> segmentKeyRanges, CubeInstance cube, Collection<TblColRef> dimensions, TupleFilter filter, Collection<TblColRef> groupBy, Collection<RowValueDecoder> rowValueDecoders, StorageContext context) { this.context = context; int limit = context.getLimit(); this.partialResultLimit = Math.max(limit, PARTIAL_DEFAULT_LIMIT); this.segmentIteratorList = new ArrayList<CubeSegmentTupleIterator>(segmentKeyRanges.size()); Map<CubeSegment, List<HBaseKeyRange>> rangesMap = makeRangesMap(segmentKeyRanges); for (Map.Entry<CubeSegment, List<HBaseKeyRange>> entry : rangesMap.entrySet()) { CubeSegmentTupleIterator segIter = new CubeSegmentTupleIterator(entry.getKey(), entry.getValue(), conn, dimensions, filter, groupBy, rowValueDecoders, context); this.segmentIteratorList.add(segIter); } this.segmentIteratorIterator = this.segmentIteratorList.iterator(); if (this.segmentIteratorIterator.hasNext()) { this.segmentIterator = this.segmentIteratorIterator.next(); } else { this.segmentIterator = ITupleIterator.EMPTY_TUPLE_ITERATOR; } }
Example #13
Source File: LocalWithSparkSessionTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public ExecutableState buildCuboid(String cubeName, SegmentRange.TSRange tsRange) throws Exception { KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); ExecutableManager execMgr = ExecutableManager.getInstance(config); DataModelManager.getInstance(config).getModels(); // ready cube, segment, cuboid layout CubeSegment oneSeg = cubeMgr.appendSegment(cube, tsRange); NSparkCubingJob job = NSparkCubingJob.create(Sets.newHashSet(oneSeg), "ADMIN"); NSparkCubingStep sparkStep = job.getSparkCubingStep(); StorageURL distMetaUrl = StorageURL.valueOf(sparkStep.getDistMetaUrl()); Assert.assertEquals("hdfs", distMetaUrl.getScheme()); Assert.assertTrue(distMetaUrl.getParameter("path").startsWith(config.getHdfsWorkingDirectory())); // launch the job execMgr.addJob(job); return wait(job); }
Example #14
Source File: CubeController.java From kylin with Apache License 2.0 | 6 votes |
/** * Get SQL of a Cube segment * * @param cubeName Cube Name * @param segmentName Segment Name * @return * @throws IOException */ @RequestMapping(value = "/{cubeName}/segs/{segmentName}/sql", method = { RequestMethod.GET }, produces = { "application/json" }) @ResponseBody public GeneralResponse getSql(@PathVariable String cubeName, @PathVariable String segmentName) { checkCubeExists(cubeName); CubeInstance cube = cubeService.getCubeManager().getCube(cubeName); CubeSegment segment = cube.getSegment(segmentName, null); if (segment == null) { throw new NotFoundException("Cannot find segment " + segmentName); } IJoinedFlatTableDesc flatTableDesc = new CubeJoinedFlatTableDesc(segment, true); String sql = JoinedFlatTable.generateSelectDataStatement(flatTableDesc); GeneralResponse response = new GeneralResponse(); response.setProperty("sql", sql); return response; }
Example #15
Source File: JoinedFlatTable.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static String generateInsertDataStatement(IJoinedFlatTableDesc flatDesc) { CubeSegment segment = ((CubeSegment) flatDesc.getSegment()); KylinConfig kylinConfig; if (null == segment) { kylinConfig = KylinConfig.getInstanceFromEnv(); } else { kylinConfig = (flatDesc.getSegment()).getConfig(); } if (kylinConfig.isAdvancedFlatTableUsed()) { try { Class advancedFlatTable = Class.forName(kylinConfig.getAdvancedFlatTableClass()); Method method = advancedFlatTable.getMethod("generateInsertDataStatement", IJoinedFlatTableDesc.class, JobEngineConfig.class); return (String) method.invoke(null, flatDesc); } catch (Exception e) { throw new RuntimeException(e); } } return "INSERT OVERWRITE TABLE " + quoteIdentifier(flatDesc.getTableName(), null) + " " + generateSelectDataStatement(flatDesc) + ";\n"; }
Example #16
Source File: CubingJobBuilder.java From Kylin with Apache License 2.0 | 5 votes |
private UpdateCubeInfoAfterMergeStep createUpdateCubeInfoAfterMergeStep(CubeSegment seg, List<String> mergingSegmentIds, String convertToHFileStepId, String jobId) { UpdateCubeInfoAfterMergeStep result = new UpdateCubeInfoAfterMergeStep(); result.setName(ExecutableConstants.STEP_NAME_UPDATE_CUBE_INFO); result.setCubeName(seg.getCubeInstance().getName()); result.setSegmentId(seg.getUuid()); result.setMergingSegmentIds(mergingSegmentIds); result.setConvertToHFileStepId(convertToHFileStepId); result.setCubingJobId(jobId); return result; }
Example #17
Source File: BuildCubeWithEngine.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private Boolean mergeSegment(String cubeName, long startDate, long endDate) throws Exception { CubeSegment segment = cubeManager.mergeSegments(cubeManager.getCube(cubeName), new TSRange(startDate, endDate), null, true); DefaultChainedExecutable job = EngineFactory.createBatchMergeJob(segment, "TEST"); jobService.addJob(job); ExecutableState state = waitForJob(job.getId()); return Boolean.valueOf(ExecutableState.SUCCEED == state); }
Example #18
Source File: FlinkExecutable.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private void attachSegmentsMetadataWithDict(List<CubeSegment> segments) throws IOException { Set<String> dumpList = new LinkedHashSet<>(); dumpList.addAll(JobRelatedMetaUtil.collectCubeMetadata(segments.get(0).getCubeInstance())); ResourceStore rs = ResourceStore.getStore(segments.get(0).getConfig()); for (CubeSegment segment : segments) { dumpList.addAll(segment.getDictionaryPaths()); if (rs.exists(segment.getStatisticsResourcePath())) { // cube statistics is not available for new segment dumpList.add(segment.getStatisticsResourcePath()); } } JobRelatedMetaUtil.dumpAndUploadKylinPropsAndMetadata(dumpList, (KylinConfigExt) segments.get(0).getConfig(), this.getParam(FlinkCubingByLayer.OPTION_META_URL.getOpt())); }
Example #19
Source File: Coordinator.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private boolean triggerSegmentBuild(String cubeName, String segmentName) { CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cubeInstance = cubeManager.getCube(cubeName); try { Pair<Long, Long> segmentRange = CubeSegment.parseSegmentName(segmentName); logger.info("submit streaming segment build, cube:{} segment:{}", cubeName, segmentName); CubeSegment newSeg = getCubeManager().appendSegment(cubeInstance, new TSRange(segmentRange.getFirst(), segmentRange.getSecond())); DefaultChainedExecutable executable = new StreamingCubingEngine().createStreamingCubingJob(newSeg, "SYSTEM"); getExecutableManager().addJob(executable); CubingJob cubingJob = (CubingJob) executable; newSeg.setLastBuildJobID(cubingJob.getId()); SegmentJobBuildInfo segmentJobBuildInfo = new SegmentJobBuildInfo(cubeName, segmentName, cubingJob.getId()); jobStatusChecker.addSegmentBuildJob(segmentJobBuildInfo); SegmentBuildState.BuildState state = new SegmentBuildState.BuildState(); state.setBuildStartTime(System.currentTimeMillis()); state.setState(SegmentBuildState.BuildState.State.BUILDING); state.setJobId(cubingJob.getId()); streamMetadataStore.updateSegmentBuildState(cubeName, segmentName, state); return true; } catch (Exception e) { logger.error("streaming job submit fail, cubeName:" + cubeName + " segment:" + segmentName, e); return false; } }
Example #20
Source File: CubingJobBuilder.java From Kylin with Apache License 2.0 | 5 votes |
private MapReduceExecutable createRangeRowkeyDistributionStep(CubeSegment seg, String inputPath) { MapReduceExecutable rowkeyDistributionStep = new MapReduceExecutable(); rowkeyDistributionStep.setName(ExecutableConstants.STEP_NAME_GET_CUBOID_KEY_DISTRIBUTION); StringBuilder cmd = new StringBuilder(); appendMapReduceParameters(cmd, seg); appendExecCmdParameters(cmd, "input", inputPath); appendExecCmdParameters(cmd, "output", getRowkeyDistributionOutputPath(seg)); appendExecCmdParameters(cmd, "cubename", seg.getCubeInstance().getName()); appendExecCmdParameters(cmd, "jobname", "Kylin_Region_Splits_Calculator_" + seg.getCubeInstance().getName() + "_Step"); rowkeyDistributionStep.setMapReduceParams(cmd.toString()); rowkeyDistributionStep.setMapReduceJobClass(RangeKeyDistributionJob.class); return rowkeyDistributionStep; }
Example #21
Source File: UpdateCubeInfoAfterBuildStep.java From kylin with Apache License 2.0 | 5 votes |
private void saveExtSnapshotIfNeeded(CubeManager cubeManager, CubeInstance cube, CubeSegment segment) throws IOException { String extLookupSnapshotStr = this.getParam(BatchConstants.ARG_EXT_LOOKUP_SNAPSHOTS_INFO); if (extLookupSnapshotStr == null || extLookupSnapshotStr.isEmpty()) { return; } Map<String, String> extLookupSnapshotMap = LookupMaterializeContext.parseLookupSnapshots(extLookupSnapshotStr); logger.info("update ext lookup snapshots:{}", extLookupSnapshotMap); List<SnapshotTableDesc> snapshotTableDescList = cube.getDescriptor().getSnapshotTableDescList(); for (SnapshotTableDesc snapshotTableDesc : snapshotTableDescList) { String tableName = snapshotTableDesc.getTableName(); if (snapshotTableDesc.isExtSnapshotTable()) { String newSnapshotResPath = extLookupSnapshotMap.get(tableName); if (newSnapshotResPath == null || newSnapshotResPath.isEmpty()) { continue; } if (snapshotTableDesc.isGlobal()) { if (!newSnapshotResPath.equals(cube.getSnapshotResPath(tableName))) { cubeManager.updateCubeLookupSnapshot(cube, tableName, newSnapshotResPath); } } else { segment.putSnapshotResPath(tableName, newSnapshotResPath); } } } }
Example #22
Source File: SegmentPruner.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public List<CubeSegment> listSegmentsForQuery(CubeInstance cube) { List<CubeSegment> r = new ArrayList<>(); for (CubeSegment seg : cube.getSegments(SegmentStatusEnum.READY)) { if (check(seg)) r.add(seg); } return r; }
Example #23
Source File: FlinkMergingDictionary.java From kylin with Apache License 2.0 | 5 votes |
private List<CubeSegment> getMergingSegments(CubeInstance cube, String[] segmentIds) { List<CubeSegment> result = Lists.newArrayListWithCapacity(segmentIds.length); for (String id : segmentIds) { result.add(cube.getSegmentById(id)); } return result; }
Example #24
Source File: CubeHBaseRPC.java From kylin with Apache License 2.0 | 5 votes |
public CubeHBaseRPC(ISegment segment, Cuboid cuboid, GTInfo fullGTInfo, StorageContext context) { Preconditions.checkArgument(segment instanceof CubeSegment, "segment must be CubeSegment"); this.cubeSeg = (CubeSegment) segment; this.cuboid = cuboid; this.fullGTInfo = fullGTInfo; this.queryContext = QueryContextFacade.current(); this.storageContext = context; this.fuzzyKeyEncoder = new FuzzyKeyEncoder(cubeSeg, cuboid); this.fuzzyMaskEncoder = new FuzzyMaskEncoder(cubeSeg, cuboid); }
Example #25
Source File: AbstractHadoopJob.java From kylin with Apache License 2.0 | 5 votes |
protected void attachSegmentMetadata(CubeSegment segment, Configuration conf, boolean ifDictIncluded, boolean ifStatsIncluded) throws IOException { Set<String> dumpList = new LinkedHashSet<>(collectCubeMetadata(segment.getCubeInstance())); if (ifDictIncluded) { dumpList.addAll(segment.getDictionaryPaths()); } if (ifStatsIncluded) { dumpList.add(segment.getStatisticsResourcePath()); } //tiretree global domain dic CubeDescTiretreeGlobalDomainDictUtil.cuboidJob(segment.getCubeDesc(), dumpList); dumpKylinPropsAndMetadata(segment.getProject(), dumpList, segment.getConfig(), conf); }
Example #26
Source File: FactDistinctColumnsJob.java From kylin with Apache License 2.0 | 5 votes |
private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance()); int numberOfReducers = reducerMapping.getTotalReducerNum(); logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers); if (numberOfReducers > 250) { throw new IllegalArgumentException( "The max reducer number for FactDistinctColumnsJob is 250, but now it is " + numberOfReducers + ", decrease 'kylin.engine.mr.uhc-reducer-count'"); } job.setReducerClass(FactDistinctColumnsReducer.class); job.setPartitionerClass(FactDistinctColumnPartitioner.class); job.setNumReduceTasks(numberOfReducers); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
Example #27
Source File: CubingJobBuilder.java From Kylin with Apache License 2.0 | 5 votes |
public CubingJob mergeJob(CubeSegment seg) { checkPreconditions(seg); CubingJob result = initialJob(seg, "MERGE"); final String jobId = result.getId(); final String mergedCuboidPath = getJobWorkingDir(jobId) + "/" + seg.getCubeInstance().getName() + "/cuboid/"; List<CubeSegment> mergingSegments = seg.getCubeInstance().getMergingSegments(seg); Preconditions.checkState(mergingSegments.size() > 1, "there should be more than 2 segments to merge"); List<String> mergingSegmentIds = Lists.newArrayList(); List<String> mergingCuboidPaths = Lists.newArrayList(); for (CubeSegment merging : mergingSegments) { mergingSegmentIds.add(merging.getUuid()); mergingCuboidPaths.add(getPathToMerge(merging)); } // merge cuboid addMergeSteps(seg, mergingSegmentIds, mergingCuboidPaths, mergedCuboidPath, result); // convert htable AbstractExecutable convertCuboidToHfileStep = addHTableSteps(seg, mergedCuboidPath, result); // update cube info result.addTask(createUpdateCubeInfoAfterMergeStep(seg, mergingSegmentIds, convertCuboidToHfileStep.getId(), jobId)); return result; }
Example #28
Source File: UpdateCubeInfoAfterOptimizeStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeManager = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment segment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(segment); long sourceCount = originalSegment.getInputRecords(); long sourceSizeBytes = originalSegment.getInputRecordsSize(); CubingJob cubingJob = (CubingJob) getManager().getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); long cubeSizeBytes = cubingJob.findCubeSizeBytes(); segment.setLastBuildJobID(CubingExecutableUtil.getCubingJobId(this.getParams())); segment.setLastBuildTime(System.currentTimeMillis()); segment.setSizeKB(cubeSizeBytes / 1024); segment.setInputRecords(sourceCount); segment.setInputRecordsSize(sourceSizeBytes); segment.setDimensionRangeInfoMap(originalSegment.getDimensionRangeInfoMap()); try { cubeManager.promoteNewlyOptimizeSegments(cube, segment); return new ExecuteResult(); } catch (IOException e) { logger.error("fail to update cube after build", e); return ExecuteResult.createError(e); } }
Example #29
Source File: CuboidRecommenderUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
/** Trigger cube planner phase one */ public static Map<Long, Long> getRecommendCuboidList(CubeSegment segment) throws IOException { if (segment == null) { return null; } CubeStatsReader cubeStatsReader = new CubeStatsReader(segment, null, segment.getConfig()); if (cubeStatsReader.getCuboidRowEstimatesHLL() == null || cubeStatsReader.getCuboidRowEstimatesHLL().isEmpty()) { logger.info("Cuboid Statistics is not enabled."); return null; } CubeInstance cube = segment.getCubeInstance(); long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId(); if (cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == null || cubeStatsReader.getCuboidRowEstimatesHLL().get(baseCuboid) == 0L) { logger.info(BASE_CUBOID_COUNT_IN_CUBOID_STATISTICS_IS_ZERO); return null; } Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids(); String key = cube.getName(); CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(), cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).setBPUSMinBenefitRatio(segment.getConfig().getCubePlannerBPUSMinBenefitRatio()).build(); return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(), !mandatoryCuboids.isEmpty()); }
Example #30
Source File: CubeInstanceCreator.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public static CubeInstance generateKylinCubeInstance(String owner, String tableName) { CubeInstance cubeInstance = new CubeInstance(); cubeInstance.setName(tableName.replace('.', '_')); cubeInstance.setSegments(new Segments<CubeSegment>()); cubeInstance.setDescName(tableName.replace('.', '_')); cubeInstance.setStatus(RealizationStatusEnum.DISABLED); cubeInstance.setOwner(owner); cubeInstance.setCreateTimeUTC(0L); cubeInstance.updateRandomUuid(); return cubeInstance; }