Java Code Examples for org.apache.kylin.cube.CubeManager#getCube()
The following examples show how to use
org.apache.kylin.cube.CubeManager#getCube() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HybridCubeCLITest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Test public void testSegmentOverlap() throws IOException { thrown.expect(RuntimeException.class); thrown.expectMessage("Segments has overlap"); HybridManager hybridManager = HybridManager.getInstance(KylinConfig.getInstanceFromEnv()); Assert.assertNull(hybridManager.getHybridInstance("ssb_hybrid")); HybridCubeCLI.main(new String[] { "-name", "ssb_hybrid", "-project", "default", "-model", "ssb", "-cubes", "ssb_cube1,ssb_cube2", "-action", "create" }); CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube1 = cubeManager.getCube("ssb_cube1"); CubeInstance cube2 = cubeManager.getCube("ssb_cube2"); // 2012-01-01,2012-01-03 cubeManager.appendSegment(cube1, new SegmentRange.TSRange(1325376000000L, 1325548800000L)); // 2012-01-02,2012-01-04 cubeManager.appendSegment(cube2, new SegmentRange.TSRange(1325462400000L, 1325635200000L)); HybridCubeCLI.main(new String[] { "-name", "ssb_hybrid", "-project", "default", "-model", "ssb", "-cubes", "ssb_cube1,ssb_cube2", "-action", "update" }); }
Example 2
Source File: ColumnarSplitReader.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (!(split instanceof FileSplit)) { throw new IllegalArgumentException("Only compatible with FileSplits."); } else { logger.debug("CFG_Cube_Name: " + BatchConstants.CFG_CUBE_NAME); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT); segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase(Locale.ROOT); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeManager cubeManager = CubeManager.getInstance(config); cube = cubeManager.getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); } }
Example 3
Source File: DeployCoprocessorCLI.java From kylin with Apache License 2.0 | 6 votes |
private static List<String> filterByProjects(List<String> allTableNames, List<String> projectNames) { ProjectManager projectManager = ProjectManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeManager cubeManager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); List<String> result = Lists.newArrayList(); for (String p : projectNames) { p = p.trim(); if (p.endsWith(",")) { p = p.substring(0, p.length() - 1); } ProjectInstance projectInstance = projectManager.getProject(p); List<RealizationEntry> cubeList = projectInstance.getRealizationEntries(RealizationType.CUBE); for (RealizationEntry cube : cubeList) { CubeInstance cubeInstance = cubeManager.getCube(cube.getRealization()); for (CubeSegment segment : cubeInstance.getSegments()) { String tableName = segment.getStorageLocationIdentifier(); if (allTableNames.contains(tableName)) { result.add(tableName); } } } } return result; }
Example 4
Source File: MergeCuboidMapper.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override protected void doSetup(Context context) throws IOException, InterruptedException { super.bindCurrentConfiguration(context.getConfiguration()); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cube = cubeManager.getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); CubeSegment mergedCubeSegment = cube.getSegmentById(segmentID); // decide which source segment FileSplit fileSplit = (FileSplit) context.getInputSplit(); IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(mergedCubeSegment) .getOutputFormat(); CubeSegment sourceCubeSegment = outputFormat.findSourceSegment(fileSplit, cube); reEncoder = new SegmentReEncoder(cubeDesc, sourceCubeSegment, mergedCubeSegment, config); }
Example 5
Source File: UpdateCubeInfoAfterCheckpointStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeManager = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = cubeManager.getCube(CubingExecutableUtil.getCubeName(this.getParams())); Set<Long> recommendCuboids = cube.getCuboidsRecommend(); try { List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.READY_PENDING); Map<Long, Long> recommendCuboidsWithStats = CuboidStatsReaderUtil .readCuboidStatsFromSegments(recommendCuboids, newSegments); if (recommendCuboidsWithStats == null) { throw new RuntimeException("Fail to get statistics info for recommended cuboids after optimization!!!"); } cubeManager.promoteCheckpointOptimizeSegments(cube, recommendCuboidsWithStats, newSegments.toArray(new CubeSegment[newSegments.size()])); return new ExecuteResult(); } catch (Exception e) { logger.error("fail to update cube after build", e); return ExecuteResult.createError(e); } }
Example 6
Source File: BaseCuboidMapperTest.java From Kylin with Apache License 2.0 | 5 votes |
@Test public void testMapperWithNull() throws Exception { String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size()); Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertEquals(22, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0"); }
Example 7
Source File: ProjectManagerTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Test public void testProjectsDrop() throws IOException { ProjectManager prjMgr = ProjectManager.getInstance(getTestConfig()); CubeManager cubeMgr = CubeManager.getInstance(getTestConfig()); CubeInstance cube = cubeMgr.getCube("ci_left_join_cube"); assertTrue(prjMgr.getRealizationsByTable("default", "default.test_kylin_fact").contains(cube)); assertTrue(prjMgr.listAllRealizations("default").contains(cube)); cubeMgr.dropCube(cube.getName(), false); assertTrue(!prjMgr.getRealizationsByTable("default", "default.test_kylin_fact").contains(cube)); assertTrue(!prjMgr.listAllRealizations("default").contains(cube)); }
Example 8
Source File: ColumnToRowJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentName = getOptionValue(OPTION_SEGMENT_NAME); KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(kylinConfig); CubeInstance cube = cubeMgr.getCube(cubeName); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); setJobClasspath(job, cube.getConfig()); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(ColumnToRowMapper.class); job.setInputFormatClass(ColumnarSplitDataInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(ColumnToRowReducer.class); job.setNumReduceTasks(calReducerNum(input)); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize()); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); attachSegmentMetadataWithDict(segment, job.getConfiguration()); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { logger.error("error in CuboidJob", e); printUsage(options); throw e; } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 9
Source File: LocalWithSparkSessionTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
protected void cleanupSegments(String cubeName) throws IOException { KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); cubeMgr.updateCubeDropSegments(cube, cube.getSegments()); }
Example 10
Source File: SignatureCalculatorTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Test public void testRealizationSetCalculator() throws IOException { KylinConfig config = KylinConfig.createKylinConfig(getTestConfig()); Map<String, String> overrides = Maps.newHashMap(); overrides.put("kylin.query.signature-class", "org.apache.kylin.rest.signature.RealizationSetCalculator"); ProjectInstance projectInstance = ProjectManager.getInstance(config).getProject(projectName); projectInstance.setConfig(KylinConfigExt.createInstance(config, overrides)); HybridManager hybridManager = HybridManager.getInstance(config); HybridInstance hybrid1 = hybridManager.getHybridInstance("test_kylin_hybrid_ready"); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cube1 = cubeManager.getCube("test_kylin_cube_with_slr_ready_2_segments"); CubeInstance cube2 = cubeManager.getCube("test_kylin_cube_without_slr_ready"); CubeInstance cube2Clone = cloneCubeInstance(cubeManager, cube2, cube2.getName() + "_clone"); //Related cubes: // - test_kylin_cube_with_slr_ready // - test_kylin_cube_with_slr_ready_2_segments // - test_kylin_cube_without_slr_ready String cubes = hybrid1.getCanonicalName() + "," + cube2Clone.getCanonicalName(); SQLResponse sqlResponse = new SQLResponse(); sqlResponse.setCube(cubes); String signature = SQLResponseSignatureUtil.createSignature(config, sqlResponse, projectName); sqlResponse.setSignature(signature); Assert.assertTrue(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName)); {//Test the influence of related cubes status change cube1 = cubeManager.updateCubeStatus(cube1, RealizationStatusEnum.DISABLED); Assert.assertFalse(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName)); cube1 = cubeManager.updateCubeStatus(cube1, RealizationStatusEnum.READY); Assert.assertTrue(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName)); } {//Test the influence of segment changes cube2Clone = cubeManager.updateCubeDropSegments(cube2Clone, cube2Clone.getSegments().get(0)); Assert.assertFalse(SQLResponseSignatureUtil.checkSignature(config, sqlResponse, projectName)); } }
Example 11
Source File: BulkLoadJob.java From Kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_HTABLE_NAME); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase(); // e.g // /tmp/kylin-3f150b00-3332-41ca-9d3d-652f67f044d7/test_kylin_cube_with_slr_ready_2_segments/hfile/ // end with "/" String input = getOptionValue(OPTION_INPUT_PATH); Configuration conf = HBaseConfiguration.create(getConf()); FileSystem fs = FileSystem.get(conf); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); KylinConfig config = KylinConfig.getInstanceFromEnv(); CubeManager cubeMgr = CubeManager.getInstance(config); CubeInstance cube = cubeMgr.getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); FsPermission permission = new FsPermission((short) 0777); for (HBaseColumnFamilyDesc cf : cubeDesc.getHBaseMapping().getColumnFamily()) { String cfName = cf.getName(); fs.setPermission(new Path(input + cfName), permission); } String[] newArgs = new String[2]; newArgs[0] = input; newArgs[1] = tableName; log.debug("Start to run LoadIncrementalHFiles"); int ret = ToolRunner.run(new LoadIncrementalHFiles(conf), newArgs); log.debug("End to run LoadIncrementalHFiles"); return ret; } catch (Exception e) { printUsage(options); throw e; } }
Example 12
Source File: MergeStatisticsWithOldStep.java From kylin with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment); Preconditions.checkNotNull(oldSegment, "cannot find the original segment to be optimized by " + optimizeSegment); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); int averageSamplingPercentage = 0; try { //1. Add statistics from optimized segment Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); FileSystem hdfs = FileSystem.get(conf); if (!hdfs.exists(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists"); } if (!hdfs.isDirectory(statisticsDirPath)) { throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory"); } Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath); } for (Path item : statisticsFiles) { CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null, optimizeSegment.getConfig(), item); averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(optimizeSegmentStatsReader); } //2. Add statistics from old segment CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig()); averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage(); addFromCubeStatsReader(oldSegmentStatsReader); logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString()); //3. Store merged statistics for recommend cuboids averageSamplingPercentage = averageSamplingPercentage / 2; Set<Long> cuboidsRecommend = cube.getCuboidsRecommend(); Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size()); for (Long cuboid : cuboidsRecommend) { HLLCounter hll = cuboidHLLMap.get(cuboid); if (hll == null) { logger.warn("Cannot get the row count stats for cuboid " + cuboid); } else { resultCuboidHLLMap.put(cuboid, hll); } } String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams()); CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap, averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount()); try (FSDataInputStream mergedStats = hdfs .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) { // put the statistics to metadata store String statisticsFileName = optimizeSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis()); } //By default, the cube optimization will use in-memory cubing CubingJob cubingJob = (CubingJob) getManager() .getJob(CubingExecutableUtil.getCubingJobId(this.getParams())); StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment); return new ExecuteResult(); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return ExecuteResult.createError(e); } }
Example 13
Source File: FilterRecommendCuboidDataJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentID = getOptionValue(OPTION_SEGMENT_ID); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment optSegment = cube.getSegmentById(segmentID); CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment); logger.info("Starting: " + job.getJobName()); setJobClasspath(job, cube.getConfig()); // Mapper job.setMapperClass(FilterRecommendCuboidDataMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // Input job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, input); // Reducer ConvergeCuboidDataUtil.setupReducer(job, originalSegment, output); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID); // add metadata to distributed cache attachSegmentMetadata(originalSegment, job.getConfiguration(), false, false); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { logger.error("error in CuboidJob", e); printUsage(options); throw e; } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 14
Source File: UpdateDictionaryStep.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig()); final DictionaryManager dictMgrHdfs; final DictionaryManager dictMgrHbase; final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); final List<CubeSegment> mergingSegments = getMergingSegments(cube); final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH); final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL); final KylinConfig kylinConfHbase = cube.getConfig(); final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl); Collections.sort(mergingSegments); try { Configuration conf = HadoopUtil.getCurrentConfiguration(); FileSystem fs = HadoopUtil.getWorkingFileSystem(); ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase); ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs); dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs); dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase); // work on copy instead of cached objects CubeInstance cubeCopy = cube.latestCopyForWrite(); CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid()); // update cube segment dictionary FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part") || path.getName().startsWith("tmp"); } }); for (FileStatus fileStatus : fileStatuss) { Path filePath = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { String tblCol = key.toString(); String dictInfoResource = value.toString(); if (StringUtils.isNotEmpty(dictInfoResource)) { logger.info(dictInfoResource); // put dictionary file to metadata store DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource); DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs); if (dicInfoHbase != null){ TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]); newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath()); } } } IOUtils.closeStream(reader); } CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1); for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) { newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue()); } // update statistics // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis()); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(newSegCopy); cubeMgr.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to merge dictionary", e); return ExecuteResult.createError(e); } }
Example 15
Source File: CubeHFileJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_PARTITION_FILE_PATH); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_HTABLE_NAME); parseOptions(options, args); Path partitionFilePath = new Path(getOptionValue(OPTION_PARTITION_FILE_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); // use current hbase configuration Configuration configuration = new Configuration(HBaseConnection.getCurrentHBaseConfiguration()); String[] allServices = getAllServices(configuration); merge(configuration, getConf()); configuration.setStrings(DFSConfigKeys.DFS_NAMESERVICES, allServices); job = Job.getInstance(configuration, getOptionValue(OPTION_JOB_NAME)); setJobClasspath(job, cube.getConfig()); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); FileOutputFormat.setOutputPath(job, output); // set job configuration job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); // add metadata to distributed cache attachCubeMetadata(cube, job.getConfiguration()); HTable htable = new HTable(configuration, getOptionValue(OPTION_HTABLE_NAME)); // Automatic config ! HFileOutputFormat3.configureIncrementalLoad(job, htable); reconfigurePartitions(configuration, partitionFilePath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CubeHFileMapper.class); job.setReducerClass(KeyValueReducer.class); job.setMapOutputKeyClass(RowKeyWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setSortComparatorClass(RowKeyWritable.RowKeyComparator.class); // set block replication to 3 for hfiles configuration.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3"); this.deletePath(job.getConfiguration(), output); return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 16
Source File: MergeDictJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_NAME); options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); parseOptions(options, args); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String jobName = getOptionValue(OPTION_JOB_NAME); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentName = getOptionValue(OPTION_SEGMENT_NAME); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); setJobClasspath(job, cube.getConfig()); job.setJobName(jobName); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); logger.info("MergeDictReducer output path: {}", output); // Mapper job.setMapperClass(MergeDictMapper.class); job.setInputFormatClass(ColumnarSplitDictInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Reducer job.setReducerClass(MergeDictReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); attachCubeMetadata(cube, job.getConfiguration()); deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { printUsage(options); logger.error("job {} failed. ", job.getJobName(), e); throw e; } }
Example 17
Source File: UHCDictionaryJob.java From kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_CUBING_JOB_ID); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_INPUT_PATH); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String job_id = getOptionValue(OPTION_CUBING_JOB_ID); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); //add metadata to distributed cache CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); attachCubeMetadata(cube, job.getConfiguration()); List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns(); int reducerCount = uhcColumns.size(); //Note! handle uhc columns is null. boolean hasUHCValue = false; for (TblColRef tblColRef : uhcColumns) { Path path = new Path(input.toString() + "/" + tblColRef.getIdentity()); if (HadoopUtil.getFileSystem(path).exists(path)) { FileInputFormat.addInputPath(job, path); FileInputFormat.setInputPathFilter(job, UHCDictPathFilter.class); hasUHCValue = true; } } if (!hasUHCValue) { isSkipped = true; return 0; } setJobClasspath(job, cube.getConfig()); setupMapper(); setupReducer(output, reducerCount); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.ARG_CUBING_JOB_ID, job_id); job.getConfiguration().set(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR, KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()); job.getConfiguration().set(BatchConstants.CFG_MAPRED_OUTPUT_COMPRESS, "false"); //8G memory is enough for all global dict, because the input is sequential and we handle global dict slice by slice job.getConfiguration().set("mapreduce.reduce.memory.mb", "8500"); job.getConfiguration().set("mapred.reduce.child.java.opts", "-Xmx8g"); //Copying global dict to working dir in GlobalDictHDFSStore maybe elapsed a long time (Maybe we could improve it) //Waiting the global dict lock maybe also take a long time. //So we set 8 hours here job.getConfiguration().set("mapreduce.task.timeout", "28800000"); //allow user specially set config for uhc step for (Map.Entry<String, String> entry : cube.getConfig().getUHCMRConfigOverride().entrySet()) { job.getConfiguration().set(entry.getKey(), entry.getValue()); } return waitForCompletion(job); } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }
Example 18
Source File: RangeKeyDistributionJob.java From Kylin with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_INPUT_PATH); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); parseOptions(options, args); // start job String jobName = getOptionValue(OPTION_JOB_NAME); job = Job.getInstance(getConf(), jobName); setJobClasspath(job); addInputDirs(getOptionValue(OPTION_INPUT_PATH), job); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); // job.getConfiguration().set("dfs.block.size", "67108864"); // Mapper job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(RangeKeyDistributionMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Reducer - only one job.setReducerClass(RangeKeyDistributionReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); this.deletePath(job.getConfiguration(), output); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); RealizationCapacity realizationCapacity = cube.getDescriptor().getModel().getCapacity(); job.getConfiguration().set(BatchConstants.CUBE_CAPACITY, realizationCapacity.toString()); return waitForCompletion(job); } catch (Exception e) { printUsage(options); throw e; } }
Example 19
Source File: AbstractExecutable.java From kylin with Apache License 2.0 | 4 votes |
public KylinConfig getCubeSpecificConfig() { String cubeName = getCubeName(); CubeManager manager = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = manager.getCube(cubeName); return cube.getConfig(); }
Example 20
Source File: KafkaFlatTableJob.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { Options options = new Options(); try { options.addOption(OPTION_JOB_NAME); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_OUTPUT_PATH); options.addOption(OPTION_SEGMENT_ID); parseOptions(options, args); job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME)); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String segmentId = getOptionValue(OPTION_SEGMENT_ID); // ---------------------------------------------------------------------------- // add metadata to distributed cache CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); CubeInstance cube = cubeMgr.getCube(cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentId); logger.info("Starting: " + job.getJobName()); setJobClasspath(job, cube.getConfig()); KafkaConfigManager kafkaConfigManager = KafkaConfigManager.getInstance(KylinConfig.getInstanceFromEnv()); KafkaConfig kafkaConfig = kafkaConfigManager.getKafkaConfig(cube.getRootFactTable()); String brokers = KafkaClient.getKafkaBrokers(kafkaConfig); String topic = kafkaConfig.getTopic(); if (brokers == null || brokers.length() == 0 || topic == null) { throw new IllegalArgumentException("Invalid Kafka information, brokers " + brokers + ", topic " + topic); } JobEngineConfig jobEngineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); job.getConfiguration().addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null))); KafkaConsumerProperties kafkaConsumerProperties = KafkaConsumerProperties.getInstanceFromEnv(); job.getConfiguration().addResource(new Path(kafkaConsumerProperties.getKafkaConsumerHadoopJobConf())); job.getConfiguration().set(CONFIG_KAFKA_BROKERS, brokers); job.getConfiguration().set(CONFIG_KAFKA_TOPIC, topic); job.getConfiguration().set(CONFIG_KAFKA_TIMEOUT, String.valueOf(kafkaConfig.getTimeout())); job.getConfiguration().set(CONFIG_KAFKA_INPUT_FORMAT, "json"); job.getConfiguration().set(CONFIG_KAFKA_PARSER_NAME, kafkaConfig.getParserName()); job.getConfiguration().set(CONFIG_KAFKA_SPLIT_ROWS, String.valueOf(kafkaConfig.getSplitRows())); job.getConfiguration().set(CONFIG_KAFKA_CONSUMER_GROUP, cubeName); // use cubeName as consumer group name appendKafkaOverrideProperties(cube.getConfig(), job.getConfiguration()); setupMapper(cube.getSegmentById(segmentId)); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); org.apache.log4j.Logger.getRootLogger().info("Output hdfs location: " + output); org.apache.log4j.Logger.getRootLogger().info("Output hdfs compression: " + true); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); attachCubeMetadata(cube, job.getConfiguration()); deletePath(job.getConfiguration(), output); return waitForCompletion(job); } catch (Exception e) { logger.error("error in KafkaFlatTableJob", e); printUsage(options); throw e; } finally { if (job != null) cleanupTempConfFile(job.getConfiguration()); } }