org.apache.kylin.dict.DictionaryManager#getInstance

Source File: MergeDictionaryMapper.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.doSetup(context);

    final SerializableConfiguration sConf = new SerializableConfiguration(context.getConfiguration());
    final String metaUrl = context.getConfiguration().get(BatchConstants.ARG_META_URL);
    final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
    final String segmentIds = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());

    final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
    final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeInstance.getDescName());

    mergingSegments = getMergingSegments(cubeInstance, StringUtil.splitByComma(segmentIds));
    tblColRefs = cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
    dictMgr = DictionaryManager.getInstance(kylinConfig);
}

Source File: MergeDictionaryStep.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

/**
 * For the new segment, we need to create new dimension dictionaries by merging underlying
 * dictionaries. (https://issues.apache.org/jira/browse/KYLIN-2457, https://issues.apache.org/jira/browse/KYLIN-2800)
 * @param cube
 * @param newSeg
 * @throws IOException
 */
private void makeDictForNewSegment(KylinConfig conf, CubeInstance cube, CubeSegment newSeg, List<CubeSegment> mergingSegments) throws IOException {
    DictionaryManager dictMgr = DictionaryManager.getInstance(conf);
    CubeDesc cubeDesc = cube.getDescriptor();

    for (TblColRef col : cubeDesc.getAllColumnsNeedDictionaryBuilt()) {
        logger.info("Merging fact table dictionary on : " + col);
        List<DictionaryInfo> dictInfos = new ArrayList<DictionaryInfo>();
        for (CubeSegment segment : mergingSegments) {
            logger.info("Including fact table dictionary of segment : " + segment);
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                } else {
                    logger.warn("Failed to load DictionaryInfo from " + segment.getDictResPath(col));
                }
            }
        }
        mergeDictionaries(dictMgr, newSeg, dictInfos, col);
    }
}

Source File: CubingUtils.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment,
        Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) {
    Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap();

    for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) {
        final TblColRef tblColRef = entry.getKey();
        final Dictionary<String> dictionary = entry.getValue();
        IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset));
        signature.setSize(endOffset - startOffset);
        DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
        logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
        DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
        try {
            DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo);
            cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath());
            realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject());
        } catch (IOException e) {
            throw new RuntimeException("error save dictionary for column:" + tblColRef, e);
        }
    }

    return realDictMap;
}

Source File: CubingUtils.java From kylin with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment,
        Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) {
    Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap();

    for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) {
        final TblColRef tblColRef = entry.getKey();
        final Dictionary<String> dictionary = entry.getValue();
        IReadableTable.TableSignature signature = new IReadableTable.TableSignature();
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset));
        signature.setSize(endOffset - startOffset);
        DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature);
        logger.info("writing dictionary for TblColRef:" + tblColRef.toString());
        DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig());
        try {
            DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo);
            cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath());
            realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject());
        } catch (IOException e) {
            throw new RuntimeException("error save dictionary for column:" + tblColRef, e);
        }
    }

    return realDictMap;
}

Source File: MergeDictionaryMapper.java From kylin with Apache License 2.0

6 votes

@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
    super.doSetup(context);

    final SerializableConfiguration sConf = new SerializableConfiguration(context.getConfiguration());
    final String metaUrl = context.getConfiguration().get(BatchConstants.ARG_META_URL);
    final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
    final String segmentIds = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());

    final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
    final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
    final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeInstance.getDescName());

    mergingSegments = getMergingSegments(cubeInstance, StringUtil.splitByComma(segmentIds));
    tblColRefs = cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
    dictMgr = DictionaryManager.getInstance(kylinConfig);
}

Source File: SrcClusterUtil.java From kylin with Apache License 2.0

5 votes

public SrcClusterUtil(String configURI, boolean ifJobFSHAEnabled, boolean ifHBaseFSHAEnabled) throws IOException {
    super(configURI, ifJobFSHAEnabled, ifHBaseFSHAEnabled);

    this.hbaseDataDir = hbaseConf.get(hbaseRootDirConfKey) + "/data/default/";
    metadataManager = TableMetadataManager.getInstance(kylinConfig);
    modelManager = DataModelManager.getInstance(kylinConfig);
    projectManager = ProjectManager.getInstance(kylinConfig);
    hybridManager = HybridManager.getInstance(kylinConfig);
    cubeManager = CubeManager.getInstance(kylinConfig);
    cubeDescManager = CubeDescManager.getInstance(kylinConfig);
    realizationRegistry = RealizationRegistry.getInstance(kylinConfig);
    dictionaryManager = DictionaryManager.getInstance(kylinConfig);
    snapshotManager = SnapshotManager.getInstance(kylinConfig);
    extSnapshotInfoManager = ExtTableSnapshotInfoManager.getInstance(kylinConfig);
}

Source File: FactDistinctColumnsMapper.java From Kylin with Apache License 2.0

5 votes

@Override
protected void setup(Context context) throws IOException {
    super.publishConfiguration(context.getConfiguration());

    Configuration conf = context.getConfiguration();

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
    cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeDesc = cube.getDescriptor();
    intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);

    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
    Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
    List<TblColRef> columns = baseCuboid.getColumns();

    ArrayList<Integer> factDictCols = new ArrayList<Integer>();
    RowKeyDesc rowkey = cubeDesc.getRowkey();
    DictionaryManager dictMgr = DictionaryManager.getInstance(config);
    for (int i = 0; i < columns.size(); i++) {
        TblColRef col = columns.get(i);
        if (rowkey.isUseDictionary(col) == false)
            continue;

        String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
        if (cubeDesc.getModel().isFactTable(scanTable)) {
            factDictCols.add(i);
        }
    }
    this.factDictCols = new int[factDictCols.size()];
    for (int i = 0; i < factDictCols.size(); i++)
        this.factDictCols[i] = factDictCols.get(i);

    schema = HCatInputFormat.getTableSchema(context.getConfiguration());
}

Source File: SparkBuildDictionary.java From kylin with Apache License 2.0

5 votes

private void init() {
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(config)) {
        cubeSegment = CubeManager.getInstance(config).getCube(cubeName).getSegmentById(segmentId);
        dictManager = DictionaryManager.getInstance(config);
    }
    initialized = true;
}

Source File: SparkMergingDictionary.java From kylin with Apache License 2.0

5 votes

private void init() {
    kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kylinConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
        dictMgr = DictionaryManager.getInstance(kylinConfig);
        mergingSegments = getMergingSegments(cubeInstance, segmentIds);
    }
}

Source File: FlinkMergingDictionary.java From kylin with Apache License 2.0

5 votes

@Override
public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
    kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kylinConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
        dictMgr = DictionaryManager.getInstance(kylinConfig);
        mergingSegments = getMergingSegments(cubeInstance, segmentIds);
    }
}

Source File: SparkBuildDictionary.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void init() {
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(config)) {
        cubeSegment = CubeManager.getInstance(config).getCube(cubeName).getSegmentById(segmentId);
        dictManager = DictionaryManager.getInstance(config);
    }
    initialized = true;
}

Source File: SparkMergingDictionary.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private void init() {
    kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
    try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
            .setAndUnsetThreadLocalConfig(kylinConfig)) {
        CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
        dictMgr = DictionaryManager.getInstance(kylinConfig);
        mergingSegments = getMergingSegments(cubeInstance, segmentIds);
    }
}

Source File: TableRecordInfo.java From Kylin with Apache License 2.0

5 votes

public TableRecordInfo(IISegment iiSegment) {

        seg = iiSegment;
        desc = seg.getIIInstance().getDescriptor();
        allColumns = desc.listAllColumns();
        nColumns = allColumns.size();
        dictionaries = new Dictionary<?>[nColumns];
        measureSerializers = new FixedLenMeasureCodec<?>[nColumns];

        DictionaryManager dictMgr = DictionaryManager.getInstance(desc.getConfig());
        int index = 0;
        for (TblColRef tblColRef : desc.listAllColumns()) {
            ColumnDesc col = tblColRef.getColumn();
            if (desc.isMetricsCol(index)) {
                measureSerializers[index] = FixedLenMeasureCodec.get(col.getType());
            } else {
                String dictPath = seg.getDictResPath(tblColRef);
                try {
                    dictionaries[index] = dictMgr.getDictionary(dictPath);
                } catch (IOException e) {
                    throw new RuntimeException("dictionary " + dictPath + " does not exist ", e);
                }
            }
            index++;
        }

        digest = createDigest();
    }

Source File: CubeManager.java From Kylin with Apache License 2.0

4 votes

private DictionaryManager getDictionaryManager() {
    return DictionaryManager.getInstance(config);
}

Source File: ITDictionaryManagerTest.java From kylin with Apache License 2.0

4 votes

@Test
public void basic() throws Exception {
    dictMgr = DictionaryManager.getInstance(getTestConfig());
    CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig())
            .getCubeDesc("test_kylin_cube_without_slr_desc");
    TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME");

    MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C");

    DictionaryInfo info1 = dictMgr.buildDictionary(col, mockupData.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info1));

    Thread.sleep(1000);

    DictionaryInfo info2 = dictMgr.buildDictionary(col, mockupData.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info2));

    // test check duplicate
    assertEquals(info1.getUuid(), info2.getUuid());
    assertEquals(info1.getResourcePath(), info1.getResourcePath());
    assertNotEquals(info1.getLastModified(), info2.getLastModified());
    assertNotEquals(info1, info2);
    assertEquals(info1.getDictionaryObject(), info2.getDictionaryObject());

    // verify dictionary entries
    @SuppressWarnings("unchecked")
    Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject();
    int id = 0;
    for (String v : mockupData.set) {
        assertEquals(id, dict.getIdFromValue(v, 0));
        assertEquals(v, dict.getValueFromId(id));
        id++;
    }

    // test empty dictionary
    MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider();
    DictionaryInfo info3 = dictMgr.buildDictionary(col, mockupEmpty.getDistinctValuesFor(col));
    System.out.println(JsonUtil.writeValueAsIndentString(info3));
    assertEquals(0, info3.getCardinality());
    assertEquals(0, info3.getDictionaryObject().getSize());
    System.out.println(info3.getDictionaryObject().getMaxId());
    System.out.println(info3.getDictionaryObject().getMinId());
    System.out.println(info3.getDictionaryObject().getSizeOfId());
}

Source File: UpdateDictionaryStep.java From kylin with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

Source File: MergeCuboidMapperTest.java From Kylin with Apache License 2.0

4 votes

@Before
public void setUp() throws Exception {

    createTestMetadata();

    logger.info("The metadataUrl is : " + getTestConfig());

    MetadataManager.clearCache();
    CubeManager.clearCache();
    ProjectManager.clearCache();
    DictionaryManager.clearCache();

    // hack for distributed cache
    // CubeManager.removeInstance(KylinConfig.createInstanceFromUri("../job/meta"));//to
    // make sure the following mapper could get latest CubeManger
    FileUtils.deleteDirectory(new File("../job/meta"));

    MergeCuboidMapper mapper = new MergeCuboidMapper();
    mapDriver = MapDriver.newMapDriver(mapper);

    cubeManager = CubeManager.getInstance(getTestConfig());
    cube = cubeManager.getCube("test_kylin_cube_without_slr_left_join_ready_2_segments");
    dictionaryManager = DictionaryManager.getInstance(getTestConfig());
    lfn = cube.getDescriptor().findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME");
    lsi = cube.getDescriptor().findColumnRef("DEFAULT.TEST_KYLIN_FACT", "CAL_DT");
    ssc = cube.getDescriptor().findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME");

    DictionaryInfo sharedDict = makeSharedDict();

    boolean isFirstSegment = true;
    for (CubeSegment segment : cube.getSegments()) {

        TableSignature signature = new TableSignature();
        signature.setSize(100);
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath("fake_dict_for" + lfn.getName() + segment.getName());

        DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumn().getName(), lfn.getColumn().getZeroBasedIndex(), "string", signature, "");

        List<byte[]> values = new ArrayList<byte[]>();
        values.add(new byte[] { 97, 97, 97 });
        if (isFirstSegment)
            values.add(new byte[] { 99, 99, 99 });
        else
            values.add(new byte[] { 98, 98, 98 });
        Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values);
        dictionaryManager.trySaveNewDict(dict, newDictInfo);
        ((TrieDictionary) dict).dump(System.out);

        segment.putDictResPath(lfn, newDictInfo.getResourcePath());
        segment.putDictResPath(lsi, sharedDict.getResourcePath());
        segment.putDictResPath(ssc, sharedDict.getResourcePath());

        // cubeManager.saveResource(segment.getCubeInstance());
        // cubeManager.afterCubeUpdated(segment.getCubeInstance());
        cubeManager.updateCube(cube);

        isFirstSegment = false;
    }

}

Source File: CubeManager.java From kylin with Apache License 2.0

4 votes

private DictionaryManager getDictionaryManager() {
    return DictionaryManager.getInstance(config);
}

Source File: CubeManager.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

private DictionaryManager getDictionaryManager() {
    return DictionaryManager.getInstance(config);
}

Source File: UpdateDictionaryStep.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(metadataUrl);

    Collections.sort(mergingSegments);

    try {
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part") || path.getName().startsWith("tmp");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null){
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).content(), System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

Java Code Examples for org.apache.kylin.dict.DictionaryManager#getInstance()