org.apache.kylin.common.util.ByteArray Java Exaples

Source File: CubeScanRangePlanner.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private List<GTRecord> buildFuzzyKeys(Map<Integer, Set<ByteArray>> fuzzyValueSet) {
    ArrayList<GTRecord> result = Lists.newArrayList();

    if (fuzzyValueSet.isEmpty())
        return result;

    // debug/profiling purpose
    if (BackdoorToggles.getDisableFuzzyKey()) {
        logger.info("The execution of this query will not use fuzzy key");
        return result;
    }

    List<Map<Integer, ByteArray>> fuzzyValueCombinations = FuzzyValueCombination.calculate(fuzzyValueSet, maxFuzzyKeys);
    for (Map<Integer, ByteArray> fuzzyValue : fuzzyValueCombinations) {

        GTRecord fuzzy = new GTRecord(gtInfo);
        for (Map.Entry<Integer, ByteArray> entry : fuzzyValue.entrySet()) {
            fuzzy.set(entry.getKey(), entry.getValue());
        }

        result.add(fuzzy);
    }
    return result;
}

Source File: TrieDictionaryForestBuilder.java From kylin with Apache License 2.0

6 votes

private void addValue(byte[] valueBytes) {
    ByteArray valueByteArray = new ByteArray(valueBytes);
    if (previousValue != null && isOrdered) {
        int comp = previousValue.compareTo(valueByteArray);
        if (comp == 0) {
            return; //duplicate value
        }
        if (comp > 0) {
            logger.info("values not in ascending order, previous '{}', current '{}'", previousValue, valueByteArray);
            isOrdered = false;
            if (!trees.isEmpty()) {
                throw new IllegalStateException("Invalid input data. Unordered data cannot be split into multi trees");
            }
        }
    }
    previousValue = valueByteArray;
    trieBuilder.addValue(valueBytes);
    curTreeSize += valueBytes.length;

    if (curTreeSize >= maxTrieTreeSize && isOrdered) {
        TrieDictionary<T> tree = trieBuilder.build(0);
        addTree(tree);
        reset();
    }
}

Source File: RawSerializer.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
public List<ByteArray> deserialize(ByteBuffer in) {
    List<ByteArray> values = new ArrayList<>();
    int size = BytesUtil.readVInt(in);
    if (size >= 0) {
        for (int i = 0; i < size; i++) {
            ByteArray ba = new ByteArray(BytesUtil.readByteArray(in));
            if (ba.length() != 0) {
                values.add(ba);
            }
        }
    } else {
        throw new RuntimeException("Read error data size:" + size);
    }
    return values;
}

Source File: TrieDictionaryForest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void writeHead(DataOutput out) throws IOException {
    ByteArrayOutputStream byteBuf = new ByteArrayOutputStream();
    DataOutputStream headOut = new DataOutputStream(byteBuf);
    headOut.writeInt(baseId);
    headOut.writeUTF(bytesConvert == null ? "" : bytesConvert.getClass().getName());
    //write accuOffset
    headOut.writeInt(accuOffset.size());
    for (int i = 0; i < accuOffset.size(); i++)
        headOut.writeInt(accuOffset.get(i));
    //write valueDivide
    headOut.writeInt(valueDivide.size());
    for (int i = 0; i < valueDivide.size(); i++) {
        ByteArray ba = valueDivide.get(i);
        byte[] byteStr = ba.toBytes();
        headOut.writeInt(byteStr.length);
        headOut.write(byteStr);
    }
    //write tree size
    headOut.writeInt(trees.size());
    headOut.close();
    //output
    byte[] head = byteBuf.toByteArray();
    out.writeInt(head.length);
    out.write(head);
}

Source File: RowKeyDecoder.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

public long decode(byte[] bytes) throws IOException {
    this.values.clear();

    long cuboidId = rowKeySplitter.split(bytes);
    initCuboid(cuboidId);

    ByteArray[] splits = rowKeySplitter.getSplitBuffers();

    int offset = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboid id part

    for (int i = 0; i < this.cuboid.getColumns().size(); i++) {
        TblColRef col = this.cuboid.getColumns().get(i);
        collectValue(col, splits[offset].array(), splits[offset].offset(), splits[offset].length());
        offset++;
    }

    return cuboidId;
}

Source File: TrieDictionaryForest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
public void dump(PrintStream out) {
    out.println("TrieDictionaryForest");
    out.println("baseId:" + baseId);
    StringBuilder sb = new StringBuilder();
    sb.append("value divide:");
    for (ByteArray ba : valueDivide)
        sb.append(bytesConvert.convertFromBytes(ba.array(), 0, ba.length()) + " ");
    sb.append("\noffset divide:");
    for (Integer offset : accuOffset)
        sb.append(offset + " ");
    out.println(sb.toString());
    for (int i = 0; i < trees.size(); i++) {
        out.println("----tree " + i + "--------");
        trees.get(i).dump(out);
    }
}

Source File: SegmentGTStartAndEnd.java From kylin with Apache License 2.0

6 votes

private ByteArray encodeTime(long ts, int index, int roundingFlag) {
    String value;
    DataType partitionColType = info.getColumnType(index);
    if (partitionColType.isDate()) {
        value = DateFormat.formatToDateStr(ts);
    } else if (partitionColType.isTimeFamily()) {
        value = DateFormat.formatToTimeWithoutMilliStr(ts);
    } else if (partitionColType.isStringFamily() || partitionColType.isIntegerFamily()) {//integer like 20160101
        String partitionDateFormat = segment.getModel().getPartitionDesc().getPartitionDateFormat();
        if (StringUtils.isEmpty(partitionDateFormat)) {
            value = "" + ts;
        } else {
            value = DateFormat.formatToDateStr(ts, partitionDateFormat);
        }
    } else {
        throw new RuntimeException("Type " + partitionColType + " is not valid partition column type");
    }

    ByteBuffer buffer = ByteBuffer.allocate(info.getMaxColumnLength());
    info.getCodeSystem().encodeColumnValue(index, value, roundingFlag, buffer);

    return ByteArray.copyOf(buffer.array(), 0, buffer.position());
}

Source File: FlinkCubingMerge.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Override
public void reduce(Iterable<Tuple2<ByteArray, Object[]>> values, Collector<Tuple2<ByteArray, Object[]>> out) throws Exception {
    Object[] result = null;
    ByteArray key = null;

    for (Tuple2<ByteArray, Object[]> item : values) {
        key = item.f0;
        if (result == null) {
            result = item.f1;
        } else {
            Object[] temp = new Object[result.length];
            aggregators.aggregate(item.f1, result, temp);
            result = temp;
        }
    }
    out.collect(new Tuple2<>(key, result));
}

Source File: RemoteDictionaryStore.java From kylin with Apache License 2.0

6 votes

int checkAndPut(ByteArray columnFamily, String rowkeyStr, int expectedValue, int putValue, boolean checkPrevious)
        throws IOException {
    byte[] rowkey = rowkeyStr.getBytes(StandardCharsets.UTF_8);
    if (rowkey.length == 0) {
        return ID_FOR_EMPTY_STR;
    }
    byte[] valueByte = Integer.toString(putValue).getBytes(StandardCharsets.UTF_8);
    Put put = new Put(rowkey);
    put.addColumn(columnFamily.array(), encodeQualifierName, valueByte);
    put.addColumn(columnFamily.array(), tsQualifierName, Bytes.toBytes(System.currentTimeMillis()));
    boolean hasPut = table.checkAndPut(rowkey, columnFamily.array(), encodeQualifierName,
            checkPrevious ? Integer.toString(expectedValue).getBytes(StandardCharsets.UTF_8) : null, put);
    if (hasPut) {
        if (printValue) {
            logger.debug("Encode {} to {}", rowkeyStr, putValue);
        }
        return putValue;
    } else {
        return ID_UNKNOWN;
    }
}

Source File: SparkCubingByLayer.java From kylin with Apache License 2.0

6 votes

private Long getRDDCountSum(JavaPairRDD<ByteArray, Object[]> rdd, final int countMeasureIndex) {
    final ByteArray ONE = new ByteArray();
    Long count = rdd.mapValues(new Function<Object[], Long>() {
        @Override
        public Long call(Object[] objects) throws Exception {
            return (Long) objects[countMeasureIndex];
        }
    }).reduce(new Function2<Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>>() {
        @Override
        public Tuple2<ByteArray, Long> call(Tuple2<ByteArray, Long> longTuple2, Tuple2<ByteArray, Long> longTuple22)
                throws Exception {
            return new Tuple2<>(ONE, longTuple2._2() + longTuple22._2());
        }
    })._2();
    return count;
}

Source File: TopNCounterSerializerTest.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

@Test
public void testSerialization() {
    TopNCounter<ByteArray> vs = new TopNCounter<ByteArray>(50);
    Integer[] stream = { 1, 1, 2, 9, 1, 2, 3, 7, 7, 1, 3, 1, 1 };
    for (Integer i : stream) {
        vs.offer(new ByteArray(Bytes.toBytes(i)));
    }
    vs.sortAndRetain();
    ByteBuffer out = ByteBuffer.allocate(1024);
    serializer.serialize(vs, out);

    byte[] copyBytes = new byte[out.position()];
    System.arraycopy(out.array(), 0, copyBytes, 0, out.position());

    ByteBuffer in = ByteBuffer.wrap(copyBytes);
    TopNCounter<ByteArray> vsNew = serializer.deserialize(in);

    Assert.assertEquals(vs.toString(), vsNew.toString());

}

Source File: FlinkCubingByLayer.java From kylin with Apache License 2.0

6 votes

@Override
public void reduce(Iterable<Tuple2<ByteArray, Object[]>> iterable, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception {
    Object[] result = null;
    ByteArray key = null;

    for (Tuple2<ByteArray, Object[]> item : iterable) {
        key = item.f0;
        if (result == null) {
            result = item.f1;
        } else {
            Object[] temp = new Object[measureNum];
            aggregators.aggregate(item.f1, result, temp);
            result = temp;
        }
    }

    collector.collect(new Tuple2<>(key, result));
}

Source File: NDCuboidBuilder.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) {
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid);

    // rowkey columns
    long mask = Long.highestOneBit(parentCuboid.getId());
    long parentCuboidId = parentCuboid.getId();
    long childCuboidId = childCuboid.getId();
    long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId());
    int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId
    int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId
    for (int i = 0; i < parentCuboidIdActualLength; i++) {
        if ((mask & parentCuboidId) > 0) {// if the this bit position equals
            // 1
            if ((mask & childCuboidId) > 0) {// if the child cuboid has this
                // column
                System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length());
                offset += splitBuffers[index].length();
            }
            index++;
        }
        mask = mask >> 1;
    }

    rowkeyEncoder.fillHeader(newKeyBodyBuf.array());
}

Source File: FlinkCubingByLayer.java From kylin with Apache License 2.0

6 votes

@Override
public void flatMap(Tuple2<ByteArray, Object[]> tuple2, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception {
    byte[] key = tuple2.f0.array();
    long cuboidId = rowKeySplitter.parseCuboid(key);
    final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId);

    // if still empty or null
    if (myChildren == null || myChildren.size() == 0) {
        return;
    }
    rowKeySplitter.split(key);
    final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId);

    for (Long child : myChildren) {
        Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child);
        ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid,
                rowKeySplitter.getSplitBuffers());

        collector.collect(new Tuple2<>(result, tuple2.f1));
    }
}

Source File: TopNCounterSerializerTest.java From kylin with Apache License 2.0

6 votes

@Test
public void testSerialization() {
    TopNCounter<ByteArray> vs = new TopNCounter<ByteArray>(50);
    Integer[] stream = { 1, 1, 2, 9, 1, 2, 3, 7, 7, 1, 3, 1, 1 };
    for (Integer i : stream) {
        vs.offer(new ByteArray(Bytes.toBytes(i)));
    }
    vs.sortAndRetain();
    ByteBuffer out = ByteBuffer.allocate(1024);
    serializer.serialize(vs, out);

    byte[] copyBytes = new byte[out.position()];
    System.arraycopy(out.array(), 0, copyBytes, 0, out.position());

    ByteBuffer in = ByteBuffer.wrap(copyBytes);
    TopNCounter<ByteArray> vsNew = serializer.deserialize(in);

    Assert.assertEquals(vs.toString(), vsNew.toString());

}

Source File: ScanRangePlannerBase.java From kylin with Apache License 2.0

5 votes

protected String makeReadable(ByteArray byteArray) {
    if (byteArray == null) {
        return null;
    } else {
        return byteArray.toReadableText();
    }
}

Source File: DimEncodingPreserveOrderTest.java From kylin with Apache License 2.0

5 votes

@Test
public void testFixedLengthHexDimEncPreserveOrder() {
    FixedLenHexDimEnc enc = new FixedLenHexDimEnc(4);
    List<ByteArray> encodedValues = Lists.newArrayList();
    encodedValues.add(encode(enc, "0000"));
    encodedValues.add(encode(enc, "0001"));
    encodedValues.add(encode(enc, "FFF0"));
    encodedValues.add(encode(enc, null));

    assertTrue(Ordering.from(new DefaultGTComparator()).isOrdered(encodedValues));
}

Source File: InvertIndexSearcher.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private EvalResult doEvalCompareIn(CompareTupleFilter filter) {
    EvalResult result = new EvalResult();
    String column = filter.getColumn().getName();
    ColInvertIndexSearcher colSearcher = colIndexSearchers.get(column);
    if (colSearcher == null) {
        return EvalResult.ALL_MATCH;
    }
    List<ImmutableRoaringBitmap> bitmaps = Lists.newArrayList();
    for (Object value : filter.getValues()) {
        byte[] bytes = null;
        if (value instanceof ByteArray) {
            bytes = ((ByteArray)value).array();
        } else if (value instanceof byte[]) {
            bytes = (byte[])value;
        } else if (value instanceof String) {
            bytes = Bytes.toBytes((String)value);
        }
        ImmutableRoaringBitmap bitmap = colSearcher.searchValue(bytes);
        if (bitmap != null) {
            bitmaps.add(bitmap);
        }
    }
    if (bitmaps.isEmpty()) {
        return result;
    }

    result.bitmap = ImmutableRoaringBitmap.or(bitmaps.toArray(new ImmutableRoaringBitmap[bitmaps.size()]));
    return result;
}

Source File: FragmentFileSearcher.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

protected ByteArray translate(int col, Object value, int roundingFlag) {
    try {
        buf.clear();
        recordCodec.encodeDimension(col, value, roundingFlag, buf);
        int length = buf.position();
        return ByteArray.copyOf(buf.array(), 0, length);
    } catch (IllegalArgumentException ex) {
        return null;
    }
}

Source File: RawSerializer.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

private List<ByteArray> current() {
    List<ByteArray> l = (List<ByteArray>) current.get();
    if (l == null) {
        l = new ArrayList<ByteArray>();
        current.set(l);
    }
    return l;
}

Source File: RawSerializer.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
public void serialize(List<ByteArray> values, ByteBuffer out) {
    if (values == null) {
        BytesUtil.writeVInt(0, out);
    } else {
        BytesUtil.writeVInt(values.size(), out);
        for (ByteArray array : values) {
            if (!out.hasRemaining() || out.remaining() < array.length()) {
                throw new RuntimeException("BufferOverflow! Please use one higher cardinality column for dimension column when build RAW cube!");
            }
            BytesUtil.writeByteArray(BytesUtil.subarray(array.array(), array.offset(), array.offset() + array.length()), out);
        }
    }
}

Source File: TopNAggregator.java From kylin with Apache License 2.0

5 votes

@Override
public void aggregate(TopNCounter<ByteArray> value) {
    if (sum == null) {
        capacity = value.getCapacity();
        sum = new TopNCounter<>(capacity * 10);
    }
    sum.merge(value);
}

Source File: RowKeySplitter.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
    this.enableSharding = cubeSeg.isEnableSharding();
    this.cubeDesc = cubeSeg.getCubeDesc();
    IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg);

    for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) {
        dimEncoding.get(rowKeyColDesc.getColRef());
    }

    this.colIO = new RowKeyColumnIO(dimEncoding);

    this.splitBuffers = new ByteArray[splitLen];
    this.splitOffsets = new int[splitLen];
    this.bufferSize = 0;
}

Source File: GTScanRequest.java From kylin with Apache License 2.0

5 votes

private GTRecord deserializeGTRecord(ByteBuffer in, GTInfo sInfo) {
    int colLength = BytesUtil.readVInt(in);
    ByteArray[] sCols = new ByteArray[colLength];
    for (int i = 0; i < colLength; i++) {
        sCols[i] = ByteArray.importData(in);
    }
    return new GTRecord(sInfo, sCols);
}

Source File: ExtendedColumnMeasureType.java From kylin with Apache License 2.0

5 votes

public IAdvMeasureFiller getAdvancedTupleFiller(FunctionDesc function, TupleInfo returnTupleInfo,
        Map<TblColRef, Dictionary<String>> dictionaryMap) {
    final TblColRef extended = getExtendedColumn(function);
    final int extendedColumnInTupleIdx = returnTupleInfo.hasColumn(extended)
            ? returnTupleInfo.getColumnIndex(extended) : -1;

    if (extendedColumnInTupleIdx == -1) {
        throw new RuntimeException("Extended column is not required in returnTupleInfo");
    }

    return new IAdvMeasureFiller() {
        private String value;

        @Override
        public void reload(Object measureValue) {
            if (measureValue == null) {
                value = null;
                return;
            }

            ByteArray byteArray = (ByteArray) measureValue;
            //the array in ByteArray is guaranteed to be completed owned by the ByteArray
            value = Bytes.toString(byteArray.array());
        }

        @Override
        public int getNumOfRows() {
            return 1;
        }

        @Override
        public void fillTuple(Tuple tuple, int row) {
            tuple.setDimensionValue(extendedColumnInTupleIdx, value);
        }
    };
}

Source File: RawSerializer.java From kylin with Apache License 2.0

5 votes

@Override
public void serialize(List<ByteArray> values, ByteBuffer out) {
    if (values == null) {
        BytesUtil.writeVInt(0, out);
    } else {
        BytesUtil.writeVInt(values.size(), out);
        for (ByteArray array : values) {
            if (!out.hasRemaining() || out.remaining() < array.length()) {
                throw new RuntimeException("BufferOverflow! Please use one higher cardinality column for dimension column when build RAW cube!");
            }
            BytesUtil.writeByteArray(BytesUtil.subarray(array.array(), array.offset(), array.offset() + array.length()), out);
        }
    }
}

Source File: SparkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception {
    if (initialized == false) {
        synchronized (SparkCubingByLayer.class) {
            if (initialized == false) {
                KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl);
                try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig
                        .setAndUnsetThreadLocalConfig(kConfig)) {
                    CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName);
                    CubeDesc cubeDesc = cubeInstance.getDescriptor();
                    CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId);
                    CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich(
                            EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
                    long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
                    Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId);
                    baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc,
                            AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid),
                            MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap());
                    initialized = true;
                }
            }
        }
    }
    baseCuboidBuilder.resetAggrs();
    byte[] rowKey = baseCuboidBuilder.buildKey(rowArray);
    Object[] result = baseCuboidBuilder.buildValueObjects(rowArray);
    return new Tuple2<>(new ByteArray(rowKey), result);
}

Source File: RowKeyEncoder.java From kylin with Apache License 2.0

5 votes

@Override
public void encode(ByteArray bodyBytes, ByteArray outputBuf) {
    Preconditions.checkState(bodyBytes.length() == bodyLength);
    Preconditions.checkState(bodyBytes.length() + getHeaderLength() == outputBuf.length(), //
            "bodybytes length: " + bodyBytes.length() + " outputBuf length: " + outputBuf.length() + " header length: " + getHeaderLength());
    System.arraycopy(bodyBytes.array(), bodyBytes.offset(), outputBuf.array(), getHeaderLength(), bodyLength);

    //fill shard and cuboid
    fillHeader(outputBuf.array());
}

Source File: RawSerializerTest.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Test
public void testNull() {
    List<ByteArray> output = doSAndD(null);
    assertEquals(output.size(), 0);
    List<ByteArray> input = new ArrayList<ByteArray>();
    output = doSAndD(input);
    assertEquals(input, output);
}

Source File: RawSerializerTest.java From kylin with Apache License 2.0

5 votes

@Test
public void testNull() {
    List<ByteArray> output = doSAndD(null);
    assertEquals(output.size(), 0);
    List<ByteArray> input = new ArrayList<ByteArray>();
    output = doSAndD(input);
    assertEquals(input, output);
}

org.apache.kylin.common.util.ByteArray Java Examples