Java Code Examples for org.apache.kylin.common.persistence.ResourceStore#putResource()
The following examples show how to use
org.apache.kylin.common.persistence.ResourceStore#putResource() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobRelatedMetaUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static void dumpResources(KylinConfig kylinConfig, File metaDir, Set<String> dumpList) throws IOException { long startTime = System.currentTimeMillis(); ResourceStore from = ResourceStore.getStore(kylinConfig); KylinConfig localConfig = KylinConfig.createInstanceFromUri(metaDir.getAbsolutePath()); ResourceStore to = ResourceStore.getStore(localConfig); for (String path : dumpList) { RawResource res = from.getResource(path); if (res == null) throw new IllegalStateException("No resource found at -- " + path); to.putResource(path, res.content(), res.lastModified()); res.content().close(); } logger.debug("Dump resources to {} took {} ms", metaDir, System.currentTimeMillis() - startTime); }
Example 2
Source File: MetaDumpUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static void dumpResources(KylinConfig kylinConfig, String metaOutDir, Set<String> dumpList) throws IOException { long startTime = System.currentTimeMillis(); ResourceStore from = ResourceStore.getStore(kylinConfig); KylinConfig localConfig = KylinConfig.createInstanceFromUri(metaOutDir); ResourceStore to = ResourceStore.getStore(localConfig); final String[] tolerantResources = { "/table_exd" }; for (String path : dumpList) { RawResource res = from.getResource(path); if (res == null) { if (StringUtils.startsWithAny(path, tolerantResources)) { continue; } else { throw new IllegalStateException("No resource found at -- " + path); } } to.putResource(path, res.content(), res.lastModified()); res.content().close(); } logger.debug("Dump resources to {} took {} ms", metaOutDir, System.currentTimeMillis() - startTime); }
Example 3
Source File: JobRelatedMetaUtil.java From kylin with Apache License 2.0 | 6 votes |
public static void dumpResources(KylinConfig kylinConfig, File metaDir, Set<String> dumpList) throws IOException { long startTime = System.currentTimeMillis(); ResourceStore from = ResourceStore.getStore(kylinConfig); KylinConfig localConfig = KylinConfig.createInstanceFromUri(metaDir.getAbsolutePath()); ResourceStore to = ResourceStore.getStore(localConfig); for (String path : dumpList) { RawResource res = from.getResource(path); if (res == null) throw new IllegalStateException("No resource found at -- " + path); to.putResource(path, res.content(), res.lastModified()); res.content().close(); } logger.debug("Dump resources to {} took {} ms", metaDir, System.currentTimeMillis() - startTime); }
Example 4
Source File: ProjectManager.java From Kylin with Apache License 2.0 | 5 votes |
private void saveResource(ProjectInstance prj) throws IOException { ResourceStore store = getStore(); store.putResource(prj.getResourcePath(), prj, PROJECT_SERIALIZER); prj = reloadProjectAt(prj.getResourcePath()); projectMap.put(norm(prj.getName()), prj); // triggers update broadcast clearL2Cache(); }
Example 5
Source File: DeployUtil.java From Kylin with Apache License 2.0 | 5 votes |
public static void duplicateFactTableData(String factTableName, String joinType) throws IOException { // duplicate a copy of this fact table, with a naming convention with fact.csv.inner or fact.csv.left // so that later test cases can select different data files ResourceStore store = ResourceStore.getStore(config()); InputStream in = store.getResource("/data/" + factTableName + ".csv"); String factTablePathWithJoinType = "/data/" + factTableName + ".csv." + joinType.toLowerCase(); store.deleteResource(factTablePathWithJoinType); store.putResource(factTablePathWithJoinType, in, System.currentTimeMillis()); in.close(); }
Example 6
Source File: DeployUtil.java From Kylin with Apache License 2.0 | 5 votes |
public static void overrideFactTableData(String factTableContent, String factTableName) throws IOException { // Write to resource store ResourceStore store = ResourceStore.getStore(config()); InputStream in = new StringInputStream(factTableContent); String factTablePath = "/data/" + factTableName + ".csv"; store.deleteResource(factTablePath); store.putResource(factTablePath, in, System.currentTimeMillis()); in.close(); }
Example 7
Source File: AbstractHadoopJob.java From Kylin with Apache License 2.0 | 5 votes |
private void dumpResources(KylinConfig kylinConfig, File metaDir, ArrayList<String> dumpList) throws IOException { ResourceStore from = ResourceStore.getStore(kylinConfig); KylinConfig localConfig = KylinConfig.createInstanceFromUri(metaDir.getAbsolutePath()); ResourceStore to = ResourceStore.getStore(localConfig); for (String path : dumpList) { InputStream in = from.getResource(path); if (in == null) throw new IllegalStateException("No resource found at -- " + path); long ts = from.getResourceTimestamp(path); to.putResource(path, in, ts); //The following log is duplicate with in ResourceStore //log.info("Dumped resource " + path + " to " + metaDir.getAbsolutePath()); } }
Example 8
Source File: IIManager.java From Kylin with Apache License 2.0 | 4 votes |
private void saveResource(IIInstance ii) throws IOException { ResourceStore store = getStore(); store.putResource(ii.getResourcePath(), ii, II_SERIALIZER); this.afterIIUpdated(ii); }
Example 9
Source File: UpdateDictionaryStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig()); final DictionaryManager dictMgrHdfs; final DictionaryManager dictMgrHbase; final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); final List<CubeSegment> mergingSegments = getMergingSegments(cube); final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH); final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL); final KylinConfig kylinConfHbase = cube.getConfig(); final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl); Collections.sort(mergingSegments); try { Configuration conf = HadoopUtil.getCurrentConfiguration(); FileSystem fs = HadoopUtil.getWorkingFileSystem(); ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase); ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs); dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs); dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase); // work on copy instead of cached objects CubeInstance cubeCopy = cube.latestCopyForWrite(); CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid()); // update cube segment dictionary FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part") || path.getName().startsWith("tmp"); } }); for (FileStatus fileStatus : fileStatuss) { Path filePath = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { String tblCol = key.toString(); String dictInfoResource = value.toString(); if (StringUtils.isNotEmpty(dictInfoResource)) { logger.info(dictInfoResource); // put dictionary file to metadata store DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource); DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs); if (dicInfoHbase != null){ TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]); newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath()); } } } IOUtils.closeStream(reader); } CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1); for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) { newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue()); } // update statistics // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis()); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(newSegCopy); cubeMgr.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to merge dictionary", e); return ExecuteResult.createError(e); } }
Example 10
Source File: CubeManager.java From Kylin with Apache License 2.0 | 4 votes |
private void saveResource(CubeInstance cube) throws IOException { ResourceStore store = getStore(); store.putResource(cube.getResourcePath(), cube, CUBE_SERIALIZER); this.afterCubeUpdated(cube); }
Example 11
Source File: DictionaryManager.java From Kylin with Apache License 2.0 | 4 votes |
void save(DictionaryInfo dict) throws IOException { ResourceStore store = MetadataManager.getInstance(config).getStore(); String path = dict.getResourcePath(); logger.info("Saving dictionary at " + path); store.putResource(path, dict, DictionaryInfoSerializer.FULL_SERIALIZER); }
Example 12
Source File: SnapshotManager.java From Kylin with Apache License 2.0 | 4 votes |
private void save(SnapshotTable snapshot) throws IOException { ResourceStore store = MetadataManager.getInstance(this.config).getStore(); String path = snapshot.getResourcePath(); store.putResource(path, snapshot, SnapshotTableSerializer.FULL_SERIALIZER); }
Example 13
Source File: SaveStatisticsStep.java From kylin with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { CubeSegment newSegment = CubingExecutableUtil.findSegment(context, CubingExecutableUtil.getCubeName(this.getParams()), CubingExecutableUtil.getSegmentId(this.getParams())); KylinConfig kylinConf = newSegment.getConfig(); ResourceStore rs = ResourceStore.getStore(kylinConf); try { FileSystem fs = HadoopUtil.getWorkingFileSystem(); Configuration hadoopConf = HadoopUtil.getCurrentConfiguration(); Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); Path[] statisticsFiles = HadoopUtil.getFilteredPath(fs, statisticsDir, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDir); } Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap(); long totalRowsBeforeMerge = 0; long grantTotal = 0; int samplingPercentage = -1; int mapperNumber = -1; for (Path item : statisticsFiles) { CubeStatsReader.CubeStatsResult cubeStatsResult = new CubeStatsReader.CubeStatsResult(item, kylinConf.getCubeStatsHLLPrecision()); cuboidHLLMap.putAll(cubeStatsResult.getCounterMap()); long pGrantTotal = 0L; for (HLLCounter hll : cubeStatsResult.getCounterMap().values()) { pGrantTotal += hll.getCountEstimate(); } totalRowsBeforeMerge += pGrantTotal * cubeStatsResult.getMapperOverlapRatio(); grantTotal += pGrantTotal; int pMapperNumber = cubeStatsResult.getMapperNumber(); if (pMapperNumber > 0) { if (mapperNumber < 0) { mapperNumber = pMapperNumber; } else { throw new RuntimeException( "Base cuboid has been distributed to multiple reducers at step FactDistinctColumnsReducer!!!"); } } int pSamplingPercentage = cubeStatsResult.getPercentage(); if (samplingPercentage < 0) { samplingPercentage = pSamplingPercentage; } else if (samplingPercentage != pSamplingPercentage) { throw new RuntimeException( "The sampling percentage should be same among all of the reducer of FactDistinctColumnsReducer!!!"); } } if (samplingPercentage < 0) { logger.warn("The sampling percentage should be set!!!"); } if (mapperNumber < 0) { logger.warn("The mapper number should be set!!!"); } if (logger.isDebugEnabled()) { logMapperAndCuboidStatistics(cuboidHLLMap, samplingPercentage, mapperNumber, grantTotal, totalRowsBeforeMerge); } double mapperOverlapRatio = grantTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grantTotal; CubingJob cubingJob = (CubingJob) getManager() .getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); long sourceRecordCount = cubingJob.findSourceRecordCount(); CubeStatsWriter.writeCuboidStatistics(hadoopConf, statisticsDir, cuboidHLLMap, samplingPercentage, mapperNumber, mapperOverlapRatio, sourceRecordCount); Path statisticsFile = new Path(statisticsDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); logger.info("{} stats saved to hdfs {}", newSegment, statisticsFile); FSDataInputStream is = fs.open(statisticsFile); try { // put the statistics to metadata store String resPath = newSegment.getStatisticsResourcePath(); rs.putResource(resPath, is, System.currentTimeMillis()); logger.info("{} stats saved to resource {}", newSegment, resPath); StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, newSegment); StatisticsDecisionUtil.optimizeCubingPlan(newSegment); } finally { IOUtils.closeStream(is); } return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to save cuboid statistics", e); return ExecuteResult.createError(e); } }
Example 14
Source File: MergeStatisticsWithOldStep.java From kylin with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment); Preconditions.checkNotNull(oldSegment, "cannot find the original segment to be optimized by " + optimizeSegment); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); int averageSamplingPercentage = 0; try { //1. Add statistics from optimized segment Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); FileSystem hdfs = FileSystem.get(conf); if (!hdfs.exists(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists"); } if (!hdfs.isDirectory(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory"); } Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath); } for (Path item : statisticsFiles) { CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null, optimizeSegment.getConfig(), item); averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(optimizeSegmentStatsReader); } //2. Add statistics from old segment CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig()); averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(oldSegmentStatsReader); logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString()); //3. Store merged statistics for recommend cuboids averageSamplingPercentage = averageSamplingPercentage / 2; Set<Long> cuboidsRecommend = cube.getCuboidsRecommend(); Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size()); for (Long cuboid : cuboidsRecommend) { HLLCounter hll = cuboidHLLMap.get(cuboid); if (hll == null) { logger.warn("Cannot get the row count stats for cuboid " + cuboid); } else { resultCuboidHLLMap.put(cuboid, hll); } } String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams()); CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap, averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount()); try (FSDataInputStream mergedStats = hdfs .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) { // put the statistics to metadata store String statisticsFileName = optimizeSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis()); } //By default, the cube optimization will use in-memory cubing CubingJob cubingJob = (CubingJob) getManager() .getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment); return new ExecuteResult(); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return ExecuteResult.createError(e); } }
Example 15
Source File: UpdateDictionaryStep.java From kylin with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig()); final DictionaryManager dictMgrHdfs; final DictionaryManager dictMgrHbase; final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); final List<CubeSegment> mergingSegments = getMergingSegments(cube); final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH); final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL); final KylinConfig kylinConfHbase = cube.getConfig(); final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl); Collections.sort(mergingSegments); try { Configuration conf = HadoopUtil.getCurrentConfiguration(); FileSystem fs = HadoopUtil.getWorkingFileSystem(); ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase); ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs); dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs); dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase); // work on copy instead of cached objects CubeInstance cubeCopy = cube.latestCopyForWrite(); CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid()); // update cube segment dictionary FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part") || path.getName().startsWith("tmp"); } }); for (FileStatus fileStatus : fileStatuss) { Path filePath = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { String tblCol = key.toString(); String dictInfoResource = value.toString(); if (StringUtils.isNotEmpty(dictInfoResource)) { logger.info(dictInfoResource); // put dictionary file to metadata store DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource); DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs); if (dicInfoHbase != null){ TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]); newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath()); } } } IOUtils.closeStream(reader); } CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1); for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) { newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue()); } // update statistics // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis()); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(newSegCopy); cubeMgr.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to merge dictionary", e); return ExecuteResult.createError(e); } }
Example 16
Source File: SaveStatisticsStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { CubeSegment newSegment = CubingExecutableUtil.findSegment(context, CubingExecutableUtil.getCubeName(this.getParams()), CubingExecutableUtil.getSegmentId(this.getParams())); KylinConfig kylinConf = newSegment.getConfig(); ResourceStore rs = ResourceStore.getStore(kylinConf); try { FileSystem fs = HadoopUtil.getWorkingFileSystem(); Configuration hadoopConf = HadoopUtil.getCurrentConfiguration(); Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); Path[] statisticsFiles = HadoopUtil.getFilteredPath(fs, statisticsDir, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDir); } Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap(); long totalRowsBeforeMerge = 0; long grantTotal = 0; int samplingPercentage = -1; int mapperNumber = -1; for (Path item : statisticsFiles) { CubeStatsReader.CubeStatsResult cubeStatsResult = new CubeStatsReader.CubeStatsResult(item, kylinConf.getCubeStatsHLLPrecision()); cuboidHLLMap.putAll(cubeStatsResult.getCounterMap()); long pGrantTotal = 0L; for (HLLCounter hll : cubeStatsResult.getCounterMap().values()) { pGrantTotal += hll.getCountEstimate(); } totalRowsBeforeMerge += pGrantTotal * cubeStatsResult.getMapperOverlapRatio(); grantTotal += pGrantTotal; int pMapperNumber = cubeStatsResult.getMapperNumber(); if (pMapperNumber > 0) { if (mapperNumber < 0) { mapperNumber = pMapperNumber; } else { throw new RuntimeException( "Base cuboid has been distributed to multiple reducers at step FactDistinctColumnsReducer!!!"); } } int pSamplingPercentage = cubeStatsResult.getPercentage(); if (samplingPercentage < 0) { samplingPercentage = pSamplingPercentage; } else if (samplingPercentage != pSamplingPercentage) { throw new RuntimeException( "The sampling percentage should be same among all of the reducer of FactDistinctColumnsReducer!!!"); } } if (samplingPercentage < 0) { logger.warn("The sampling percentage should be set!!!"); } if (mapperNumber < 0) { logger.warn("The mapper number should be set!!!"); } if (logger.isDebugEnabled()) { logMapperAndCuboidStatistics(cuboidHLLMap, samplingPercentage, mapperNumber, grantTotal, totalRowsBeforeMerge); } double mapperOverlapRatio = grantTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grantTotal; CubingJob cubingJob = (CubingJob) getManager() .getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); long sourceRecordCount = cubingJob.findSourceRecordCount(); CubeStatsWriter.writeCuboidStatistics(hadoopConf, statisticsDir, cuboidHLLMap, samplingPercentage, mapperNumber, mapperOverlapRatio, sourceRecordCount); Path statisticsFile = new Path(statisticsDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); logger.info("{} stats saved to hdfs {}", newSegment, statisticsFile); FSDataInputStream is = fs.open(statisticsFile); try { // put the statistics to metadata store String resPath = newSegment.getStatisticsResourcePath(); rs.putResource(resPath, is, System.currentTimeMillis()); logger.info("{} stats saved to resource {}", newSegment, resPath); StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, newSegment); StatisticsDecisionUtil.optimizeCubingPlan(newSegment); } finally { IOUtils.closeStream(is); } return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to save cuboid statistics", e); return ExecuteResult.createError(e); } }
Example 17
Source File: MergeStatisticsWithOldStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment); Preconditions.checkNotNull(oldSegment, "cannot find the original segment to be optimized by " + optimizeSegment); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); int averageSamplingPercentage = 0; try { //1. Add statistics from optimized segment Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); FileSystem hdfs = FileSystem.get(conf); if (!hdfs.exists(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists"); } if (!hdfs.isDirectory(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory"); } Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath); } for (Path item : statisticsFiles) { CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null, optimizeSegment.getConfig(), item); averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(optimizeSegmentStatsReader); } //2. Add statistics from old segment CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig()); averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(oldSegmentStatsReader); logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString()); //3. Store merged statistics for recommend cuboids averageSamplingPercentage = averageSamplingPercentage / 2; Set<Long> cuboidsRecommend = cube.getCuboidsRecommend(); Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size()); for (Long cuboid : cuboidsRecommend) { HLLCounter hll = cuboidHLLMap.get(cuboid); if (hll == null) { logger.warn("Cannot get the row count stats for cuboid " + cuboid); } else { resultCuboidHLLMap.put(cuboid, hll); } } String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams()); CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap, averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount()); try (FSDataInputStream mergedStats = hdfs .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) { // put the statistics to metadata store String statisticsFileName = optimizeSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis()); } //By default, the cube optimization will use in-memory cubing CubingJob cubingJob = (CubingJob) getManager() .getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment); return new ExecuteResult(); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return ExecuteResult.createError(e); } }