org.apache.kylin.common.util.HadoopUtil#getCurrentConfiguration

Source File: HiveToBaseCuboidMapperPerformanceTest.java From kylin with Apache License 2.0

6 votes

@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
    Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);

    mapper.doSetup(context);

    Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
    Text value = new Text();

    while (reader.next(key, value)) {
        mapper.map(key, value, context);
    }

    reader.close();
}

Source File: CubeStatsWriterTest.java From kylin with Apache License 2.0

6 votes

@Test
public void testWrite() throws IOException {
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.defaultFS", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");
    conf.set("fs.file.impl.disable.cache", "true");

    final Path outputPath = new Path(getTmpFolderPath(), segmentId);

    System.out.println(outputPath);
    Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();

    Set<Long> allCuboids = cube.getDescriptor().getAllCuboids();
    for (Long cuboid : allCuboids) {
        cuboidHLLMap.put(cuboid, createMockHLLCounter());
    }
    CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
    assertTrue(new File(outputPath.toString(), BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME).exists());
}

Source File: HBaseResourceStore.java From Kylin with Apache License 2.0

6 votes

@Override
protected InputStream getResourceImpl(String resPath) throws IOException {
    Result r = getByScan(resPath, B_FAMILY, B_COLUMN);
    if (r == null)
        return null;

    byte[] value = r.getValue(B_FAMILY, B_COLUMN);
    if (value.length == 0) {
        Path redirectPath = bigCellHDFSPath(resPath);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);

        return fileSystem.open(redirectPath);
    } else {
        return new ByteArrayInputStream(value);
    }
}

Source File: CubeStatsReader.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public CubeStatsResult(Path path, int precision) throws IOException {
    Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
    Option seqInput = SequenceFile.Reader.file(path);
    try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() == -3) {
                sourceRecordCount = Bytes.toLong(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(precision);
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }
    }
}

Source File: FactDistinctColumnsReducerTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Test
public void testWriteCuboidStatistics() throws IOException {

    final Configuration conf = HadoopUtil.getCurrentConfiguration();
    File tmp = File.createTempFile("cuboidstatistics", "");
    final Path outputPath = new Path(tmp.getParent().toString() + File.separator + RandomUtil.randomUUID().toString());
    if (!FileSystem.getLocal(conf).exists(outputPath)) {
        //            FileSystem.getLocal(conf).create(outputPath);
    }

    System.out.println(outputPath);
    Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
    CubeStatsWriter.writeCuboidStatistics(conf, outputPath, cuboidHLLMap, 100);
    FileSystem.getLocal(conf).delete(outputPath, true);

}

Source File: IICLI.java From Kylin with Apache License 2.0

6 votes

public static void main(String[] args) throws IOException {
	Configuration hconf = HadoopUtil.getCurrentConfiguration();
	IIManager mgr = IIManager.getInstance(KylinConfig.getInstanceFromEnv());

	String iiName = args[0];
	IIInstance ii = mgr.getII(iiName);

	String path = args[1];
	System.out.println("Reading from " + path + " ...");

	TableRecordInfo info = new TableRecordInfo(ii.getFirstSegment());
	IIKeyValueCodec codec = new IIKeyValueCodec(info.getDigest());
	int count = 0;
	for (Slice slice : codec.decodeKeyValue(readSequenceKVs(hconf, path))) {
		for (RawTableRecord rec : slice) {
			System.out.printf(new TableRecord(rec, info).toString());
			count++;
		}
	}
	System.out.println("Total " + count + " records");
}

Source File: HBaseResourceStore.java From Kylin with Apache License 2.0

6 votes

private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table) throws IOException {
    Path redirectPath = bigCellHDFSPath(resPath);
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    if (fileSystem.exists(redirectPath)) {
        fileSystem.delete(redirectPath, true);
    }

    FSDataOutputStream out = fileSystem.create(redirectPath);

    try {
        out.write(largeColumn);
    } finally {
        IOUtils.closeQuietly(out);
    }

    return redirectPath;
}

Source File: AppendTrieDictionaryTest.java From kylin with Apache License 2.0

6 votes

private void convertIndexToOldFormat(String baseDir) throws IOException {
    Path basePath = new Path(baseDir);
    FileSystem fs = HadoopUtil.getFileSystem(basePath);

    GlobalDictHDFSStore store = new GlobalDictHDFSStore(baseDir);
    Long[] versions = store.listAllVersions();
    GlobalDictMetadata metadata = store.getMetadata(versions[versions.length - 1]);

    //convert v2 index to v1 index
    Path versionPath = store.getVersionDir(versions[versions.length - 1]);
    Path v2IndexFile = new Path(versionPath, V2_INDEX_NAME);

    fs.delete(v2IndexFile, true);
    GlobalDictHDFSStore.IndexFormat indexFormatV1 = new GlobalDictHDFSStore.IndexFormatV1(fs,
            HadoopUtil.getCurrentConfiguration());
    indexFormatV1.writeIndexFile(versionPath, metadata);

    //convert v2 fileName format to v1 fileName format
    for (Map.Entry<AppendDictSliceKey, String> entry : metadata.sliceFileMap.entrySet()) {
        fs.rename(new Path(versionPath, entry.getValue()), new Path(versionPath, "cached_" + entry.getKey()));
    }
}

Source File: NDCuboidJobTest.java From kylin with Apache License 2.0

5 votes

@Before
public void setup() throws Exception {
    conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.default.name", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");

    // for local runner out-of-memory issue
    conf.set("mapreduce.task.io.sort.mb", "10");

    createTestMetadata();
}

Source File: DeployCoprocessorCLI.java From Kylin with Apache License 2.0

5 votes

private static void initHTableCoprocessor(HTableDescriptor desc) throws IOException {
    KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    String localCoprocessorJar = kylinConfig.getCoprocessorLocalJar();
    Path hdfsCoprocessorJar = DeployCoprocessorCLI.uploadCoprocessorJar(localCoprocessorJar, fileSystem, null);

    DeployCoprocessorCLI.addCoprocessorOnHTable(desc, hdfsCoprocessorJar);
}

Source File: NDCuboidJobTest.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Before
public void setup() throws Exception {
    conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.default.name", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");

    // for local runner out-of-memory issue
    conf.set("mapreduce.task.io.sort.mb", "10");

    createTestMetadata();
}

Source File: MergeCuboidJobTest.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Before
public void setup() throws Exception {
    conf = HadoopUtil.getCurrentConfiguration();
    conf.set("fs.default.name", "file:///");
    conf.set("mapreduce.framework.name", "local");
    conf.set("mapreduce.application.framework.path", "");

    // for local runner out-of-memory issue
    conf.set("mapreduce.task.io.sort.mb", "10");
    createTestMetadata();
}

Source File: HBaseResourceStoreTest.java From Kylin with Apache License 2.0

5 votes

@Test
public void testHBaseStoreWithLargeCell() throws Exception {
    String path = "/cube/_test_large_cell.json";
    String largeContent = "THIS_IS_A_LARGE_CELL";
    StringEntity content = new StringEntity(largeContent);
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    int origSize = config.getHBaseKeyValueSize();
    ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv());

    try {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1));

        store.deleteResource(path);

        store.putResource(path, content, StringEntity.serializer);
        assertTrue(store.exists(path));
        StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer);
        assertEquals(content, t);

        Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);
        assertTrue(fileSystem.exists(redirectPath));

        FSDataInputStream in = fileSystem.open(redirectPath);
        assertEquals(largeContent, in.readUTF());
        in.close();

        store.deleteResource(path);
    } finally {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize);
        store.deleteResource(path);
    }
}

Source File: AbstractHadoopJob.java From kylin with Apache License 2.0

4 votes

public AbstractHadoopJob() {
    super(HadoopUtil.getCurrentConfiguration());
}

Source File: MrJobInfoExtractor.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

private void extractRestCheckUrl() {
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    yarnMasterUrlBase = HadoopConfExtractor.extractYarnMasterUrl(conf);
    jobHistoryUrlBase = HadoopConfExtractor.extractJobHistoryUrl(yarnMasterUrlBase, conf);
    logger.info("job history url base: " + jobHistoryUrlBase);
}

Source File: MergeStatisticsWithOldStep.java From kylin with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment optimizeSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));

    CubeSegment oldSegment = optimizeSegment.getCubeInstance().getOriginalSegmentToOptimize(optimizeSegment);
    Preconditions.checkNotNull(oldSegment,
            "cannot find the original segment to be optimized by " + optimizeSegment);

    KylinConfig kylinConf = cube.getConfig();
    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    int averageSamplingPercentage = 0;

    try {
        //1. Add statistics from optimized segment
        Path statisticsDirPath = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        FileSystem hdfs = FileSystem.get(conf);
        if (!hdfs.exists(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " does not exists");
        }

        if (!hdfs.isDirectory(statisticsDirPath)) {
            throw new IOException("StatisticsFilePath " + statisticsDirPath + " is not a directory");
        }

        Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFiles == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath);
        }

        for (Path item : statisticsFiles) {
            CubeStatsReader optimizeSegmentStatsReader = new CubeStatsReader(optimizeSegment, null,
                    optimizeSegment.getConfig(), item);
            averageSamplingPercentage += optimizeSegmentStatsReader.getSamplingPercentage();
            addFromCubeStatsReader(optimizeSegmentStatsReader);
        }

        //2. Add statistics from old segment
        CubeStatsReader oldSegmentStatsReader = new CubeStatsReader(oldSegment, null, oldSegment.getConfig());
        averageSamplingPercentage += oldSegmentStatsReader.getSamplingPercentage();
        addFromCubeStatsReader(oldSegmentStatsReader);

        logger.info("Cuboid set with stats info: " + cuboidHLLMap.keySet().toString());
        //3. Store merged statistics for recommend cuboids
        averageSamplingPercentage = averageSamplingPercentage / 2;
        Set<Long> cuboidsRecommend = cube.getCuboidsRecommend();

        Map<Long, HLLCounter> resultCuboidHLLMap = Maps.newHashMapWithExpectedSize(cuboidsRecommend.size());
        for (Long cuboid : cuboidsRecommend) {
            HLLCounter hll = cuboidHLLMap.get(cuboid);
            if (hll == null) {
                logger.warn("Cannot get the row count stats for cuboid " + cuboid);
            } else {
                resultCuboidHLLMap.put(cuboid, hll);
            }
        }

        String resultDir = CubingExecutableUtil.getMergedStatisticsPath(this.getParams());
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(resultDir), resultCuboidHLLMap,
                averageSamplingPercentage, oldSegmentStatsReader.getSourceRowCount());

        try (FSDataInputStream mergedStats = hdfs
                .open(new Path(resultDir, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME))) {
            // put the statistics to metadata store
            String statisticsFileName = optimizeSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, mergedStats, System.currentTimeMillis());
        }

        //By default, the cube optimization will use in-memory cubing
        CubingJob cubingJob = (CubingJob) getManager()
                .getJob(CubingExecutableUtil.getCubingJobId(this.getParams()));
        StatisticsDecisionUtil.decideCubingAlgorithm(cubingJob, optimizeSegment);

        return new ExecuteResult();
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return ExecuteResult.createError(e);
    }

}

Source File: UpdateDictionaryStep.java From kylin with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

Source File: CubeHFileMapper2Test.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Test
public void testBasic() throws Exception {

    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    Context context = MockupMapContext.create(hconf, cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.doSetup(context);

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0);
}

Source File: CubeHFileMapper2Test.java From kylin with Apache License 2.0

4 votes

@Test
public void testBasic() throws Exception {

    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    Context context = MockupMapContext.create(hconf, cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.doSetup(context);

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0);
}

Source File: GlobalDictHDFSStore.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

public GlobalDictHDFSStore(String baseDir) throws IOException {
    super(baseDir);
    this.basePath = new Path(baseDir);
    this.conf = HadoopUtil.getCurrentConfiguration();
    this.fileSystem = HadoopUtil.getFileSystem(baseDir);
}

Java Code Examples for org.apache.kylin.common.util.HadoopUtil#getCurrentConfiguration()