org.apache.kylin.dict.TrieDictionary Java Exaples

Source File: MergeCuboidMapperTest.java From Kylin with Apache License 2.0

6 votes

private DictionaryInfo makeSharedDict() throws IOException {
    TableSignature signature = new TableSignature();
    signature.setSize(100);
    signature.setLastModifiedTime(System.currentTimeMillis());
    signature.setPath("fake_common_dict");

    DictionaryInfo newDictInfo = new DictionaryInfo("", "", 0, "string", signature, "");

    List<byte[]> values = new ArrayList<byte[]>();
    values.add(new byte[] { 101, 101, 101 });
    values.add(new byte[] { 102, 102, 102 });
    Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values);
    dictionaryManager.trySaveNewDict(dict, newDictInfo);
    ((TrieDictionary) dict).dump(System.out);

    return newDictInfo;
}

Source File: AppendDictNode.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private int build_writeNode(AppendDictNode n, int offset, boolean isLastChild, int sizeChildOffset, int sizeId,
        byte[] trieBytes) {
    int o = offset;

    // childOffset
    if (isLastChild)
        trieBytes[o] |= TrieDictionary.BIT_IS_LAST_CHILD;
    if (n.isEndOfValue)
        trieBytes[o] |= TrieDictionary.BIT_IS_END_OF_VALUE;
    o += sizeChildOffset;

    // nValueBytes
    if (n.part.length > 255)
        throw new RuntimeException(
                "Value length is " + n.part.length + " and larger than 255: " + Bytes.toStringBinary(n.part));
    BytesUtil.writeUnsigned(n.part.length, trieBytes, o, 1);
    o++;

    // valueBytes
    System.arraycopy(n.part, 0, trieBytes, o, n.part.length);
    o += n.part.length;

    if (n.isEndOfValue) {
        checkValidId(n.id);
        BytesUtil.writeUnsigned(n.id, trieBytes, o, sizeId);
        o += sizeId;
    }

    return o;
}

Source File: AppendDictNode.java From kylin with Apache License 2.0

5 votes

private int build_writeNode(AppendDictNode n, int offset, boolean isLastChild, int sizeChildOffset, int sizeId,
        byte[] trieBytes) {
    int o = offset;

    // childOffset
    if (isLastChild)
        trieBytes[o] |= TrieDictionary.BIT_IS_LAST_CHILD;
    if (n.isEndOfValue)
        trieBytes[o] |= TrieDictionary.BIT_IS_END_OF_VALUE;
    o += sizeChildOffset;

    // nValueBytes
    if (n.part.length > 255)
        throw new RuntimeException(
                "Value length is " + n.part.length + " and larger than 255: " + Bytes.toStringBinary(n.part));
    BytesUtil.writeUnsigned(n.part.length, trieBytes, o, 1);
    o++;

    // valueBytes
    System.arraycopy(n.part, 0, trieBytes, o, n.part.length);
    o += n.part.length;

    if (n.isEndOfValue) {
        checkValidId(n.id);
        BytesUtil.writeUnsigned(n.id, trieBytes, o, sizeId);
        o += sizeId;
    }

    return o;
}

Source File: AppendDictNode.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

private void build_overwriteChildOffset(int parentOffset, int childOffset, int sizeChildOffset, byte[] trieBytes) {
    int flags = (int) trieBytes[parentOffset]
            & (TrieDictionary.BIT_IS_LAST_CHILD | TrieDictionary.BIT_IS_END_OF_VALUE);
    BytesUtil.writeUnsigned(childOffset, trieBytes, parentOffset, sizeChildOffset);
    trieBytes[parentOffset] |= flags;
}

Source File: ColumnarMemoryStorePersister.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

/**
 * This method is used to persist the dimension data to disk file, first part is the dictionary, second part is the dimension value, third part is the index.
 *
 * @param dimValueList
 * @param dimensionMetaList
 * @param indexOut
 * @param dimension
 * @param dictMaps
 * @throws IOException
 */
private void persistDimension(long cuboidId, List<Object> dimValueList, List<DimensionMetaInfo> dimensionMetaList,
        CountingOutputStream indexOut, TblColRef dimension, Map<TblColRef, Dictionary<String>> dictMaps)
        throws IOException {
    Stopwatch stopwatch = new Stopwatch();
    stopwatch.start();

    DimensionMetaInfo dimensionMeta = new DimensionMetaInfo();
    dimensionMetaList.add(dimensionMeta);

    DimensionEncoding encoding;
    IIColumnDescriptor columnDescriptor;
    if (dimensionsUseDictEncoding.contains(dimension)) {
        Dictionary<String> dict = dictMaps.get(dimension);
        encoding = new DictionaryDimEnc(dict);
        if (dict instanceof TrieDictionary) {
            columnDescriptor = new SeqIIColumnDescriptor(dimension.getName(), dict.getMinId(), dict.getMaxId());
        } else {
            columnDescriptor = new FixLenIIColumnDescriptor(dimension.getName(), encoding.getLengthOfEncoding());
        }
    } else {
        RowKeyColDesc colDesc = cubeDesc.getRowkey().getColDesc(dimension);
        encoding = DimensionEncodingFactory.create(colDesc.getEncodingName(), colDesc.getEncodingArgs(),
                colDesc.getEncodingVersion());
        columnDescriptor = new FixLenIIColumnDescriptor(dimension.getName(), encoding.getLengthOfEncoding());
    }
    dimensionMeta.setName(dimension.getName());
    dimensionMeta.setStartOffset((int) indexOut.getCount());
    int fixEncodingLen = encoding.getLengthOfEncoding();

    DataOutputStream dataOut = new DataOutputStream(indexOut);
    ColumnarStoreDimDesc cStoreDimDesc = getColumnarStoreDimDesc(dimension, encoding);
    ColumnDataWriter columnDataWriter = cStoreDimDesc.getDimWriter(dataOut, dimValueList.size());

    //Raw values are stored on disk files with fixed length encoding to make it easy for inverted index to search and scan.
    for (Object cell : dimValueList) {
        byte[] fixLenBytes = new byte[fixEncodingLen];
        if (cell != null) {
            encoding.encode((String) cell, fixLenBytes, 0);
        } else {
            encoding.encode(null, fixLenBytes, 0);
            dimensionMeta.setHasNull(true);
        }
        columnDescriptor.getWriter().addValue(fixLenBytes);
        columnDataWriter.write(fixLenBytes);
    }
    columnDataWriter.flush();
    dimensionMeta.setDataLength(dataOut.size());
    columnDescriptor.getWriter().write(indexOut);
    dimensionMeta.setIndexLength((int) indexOut.getCount() - dimensionMeta.getStartOffset()
            - dimensionMeta.getDataLength());
    dimensionMeta.setCompression(cStoreDimDesc.getCompression().name());

    stopwatch.stop();
    if (logger.isDebugEnabled()) {
        logger.debug("cuboid-{} saved dimension:{}, took: {}ms", cuboidId, dimension.getName(),
                stopwatch.elapsedMillis());
    }
}

Source File: AppendDictNode.java From kylin with Apache License 2.0

4 votes

private void build_overwriteChildOffset(int parentOffset, int childOffset, int sizeChildOffset, byte[] trieBytes) {
    int flags = (int) trieBytes[parentOffset]
            & (TrieDictionary.BIT_IS_LAST_CHILD | TrieDictionary.BIT_IS_END_OF_VALUE);
    BytesUtil.writeUnsigned(childOffset, trieBytes, parentOffset, sizeChildOffset);
    trieBytes[parentOffset] |= flags;
}

Source File: ColumnarMemoryStorePersister.java From kylin with Apache License 2.0

4 votes

/**
 * This method is used to persist the dimension data to disk file, first part is the dictionary, second part is the dimension value, third part is the index.
 *
 * @param dimValueList
 * @param dimensionMetaList
 * @param indexOut
 * @param dimension
 * @param dictMaps
 * @throws IOException
 */
private void persistDimension(long cuboidId, List<Object> dimValueList, List<DimensionMetaInfo> dimensionMetaList,
        CountingOutputStream indexOut, TblColRef dimension, Map<TblColRef, Dictionary<String>> dictMaps)
        throws IOException {
    Stopwatch stopwatch = Stopwatch.createUnstarted();
    stopwatch.start();

    DimensionMetaInfo dimensionMeta = new DimensionMetaInfo();
    dimensionMetaList.add(dimensionMeta);

    DimensionEncoding encoding;
    IIColumnDescriptor columnDescriptor;
    if (dimensionsUseDictEncoding.contains(dimension)) {
        Dictionary<String> dict = dictMaps.get(dimension);
        encoding = new DictionaryDimEnc(dict);
        if (dict instanceof TrieDictionary) {
            columnDescriptor = new SeqIIColumnDescriptor(dimension.getName(), dict.getMinId(), dict.getMaxId());
        } else {
            columnDescriptor = new FixLenIIColumnDescriptor(dimension.getName(), encoding.getLengthOfEncoding());
        }
    } else {
        RowKeyColDesc colDesc = cubeDesc.getRowkey().getColDesc(dimension);
        encoding = DimensionEncodingFactory.create(colDesc.getEncodingName(), colDesc.getEncodingArgs(),
                colDesc.getEncodingVersion());
        columnDescriptor = new FixLenIIColumnDescriptor(dimension.getName(), encoding.getLengthOfEncoding());
    }
    dimensionMeta.setName(dimension.getName());
    dimensionMeta.setStartOffset((int) indexOut.getCount());
    int fixEncodingLen = encoding.getLengthOfEncoding();

    DataOutputStream dataOut = new DataOutputStream(indexOut);
    ColumnarStoreDimDesc cStoreDimDesc = getColumnarStoreDimDesc(dimension, encoding);
    ColumnDataWriter columnDataWriter = cStoreDimDesc.getDimWriter(dataOut, dimValueList.size());

    //Raw values are stored on disk files with fixed length encoding to make it easy for inverted index to search and scan.
    for (Object cell : dimValueList) {
        byte[] fixLenBytes = new byte[fixEncodingLen];
        if (cell != null) {
            encoding.encode((String) cell, fixLenBytes, 0);
        } else {
            encoding.encode(null, fixLenBytes, 0);
            dimensionMeta.setHasNull(true);
        }
        columnDescriptor.getWriter().addValue(fixLenBytes);
        columnDataWriter.write(fixLenBytes);
    }
    columnDataWriter.flush();
    dimensionMeta.setDataLength(dataOut.size());
    columnDescriptor.getWriter().write(indexOut);
    dimensionMeta.setIndexLength((int) indexOut.getCount() - dimensionMeta.getStartOffset()
            - dimensionMeta.getDataLength());
    dimensionMeta.setCompression(cStoreDimDesc.getCompression().name());

    stopwatch.stop();
    if (logger.isDebugEnabled()) {
        logger.debug("cuboid-{} saved dimension:{}, took: {}ms", cuboidId, dimension.getName(),
                stopwatch.elapsed(MILLISECONDS));
    }
}

Source File: MergeCuboidMapperTest.java From Kylin with Apache License 2.0

4 votes

@Before
public void setUp() throws Exception {

    createTestMetadata();

    logger.info("The metadataUrl is : " + getTestConfig());

    MetadataManager.clearCache();
    CubeManager.clearCache();
    ProjectManager.clearCache();
    DictionaryManager.clearCache();

    // hack for distributed cache
    // CubeManager.removeInstance(KylinConfig.createInstanceFromUri("../job/meta"));//to
    // make sure the following mapper could get latest CubeManger
    FileUtils.deleteDirectory(new File("../job/meta"));

    MergeCuboidMapper mapper = new MergeCuboidMapper();
    mapDriver = MapDriver.newMapDriver(mapper);

    cubeManager = CubeManager.getInstance(getTestConfig());
    cube = cubeManager.getCube("test_kylin_cube_without_slr_left_join_ready_2_segments");
    dictionaryManager = DictionaryManager.getInstance(getTestConfig());
    lfn = cube.getDescriptor().findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME");
    lsi = cube.getDescriptor().findColumnRef("DEFAULT.TEST_KYLIN_FACT", "CAL_DT");
    ssc = cube.getDescriptor().findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME");

    DictionaryInfo sharedDict = makeSharedDict();

    boolean isFirstSegment = true;
    for (CubeSegment segment : cube.getSegments()) {

        TableSignature signature = new TableSignature();
        signature.setSize(100);
        signature.setLastModifiedTime(System.currentTimeMillis());
        signature.setPath("fake_dict_for" + lfn.getName() + segment.getName());

        DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumn().getName(), lfn.getColumn().getZeroBasedIndex(), "string", signature, "");

        List<byte[]> values = new ArrayList<byte[]>();
        values.add(new byte[] { 97, 97, 97 });
        if (isFirstSegment)
            values.add(new byte[] { 99, 99, 99 });
        else
            values.add(new byte[] { 98, 98, 98 });
        Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values);
        dictionaryManager.trySaveNewDict(dict, newDictInfo);
        ((TrieDictionary) dict).dump(System.out);

        segment.putDictResPath(lfn, newDictInfo.getResourcePath());
        segment.putDictResPath(lsi, sharedDict.getResourcePath());
        segment.putDictResPath(ssc, sharedDict.getResourcePath());

        // cubeManager.saveResource(segment.getCubeInstance());
        // cubeManager.afterCubeUpdated(segment.getCubeInstance());
        cubeManager.updateCube(cube);

        isFirstSegment = false;
    }

}

org.apache.kylin.dict.TrieDictionary Java Examples