org.apache.kylin.common.util.ByteArray Java Examples
The following examples show how to use
org.apache.kylin.common.util.ByteArray.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CubeScanRangePlanner.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private List<GTRecord> buildFuzzyKeys(Map<Integer, Set<ByteArray>> fuzzyValueSet) { ArrayList<GTRecord> result = Lists.newArrayList(); if (fuzzyValueSet.isEmpty()) return result; // debug/profiling purpose if (BackdoorToggles.getDisableFuzzyKey()) { logger.info("The execution of this query will not use fuzzy key"); return result; } List<Map<Integer, ByteArray>> fuzzyValueCombinations = FuzzyValueCombination.calculate(fuzzyValueSet, maxFuzzyKeys); for (Map<Integer, ByteArray> fuzzyValue : fuzzyValueCombinations) { GTRecord fuzzy = new GTRecord(gtInfo); for (Map.Entry<Integer, ByteArray> entry : fuzzyValue.entrySet()) { fuzzy.set(entry.getKey(), entry.getValue()); } result.add(fuzzy); } return result; }
Example #2
Source File: TrieDictionaryForestBuilder.java From kylin with Apache License 2.0 | 6 votes |
private void addValue(byte[] valueBytes) { ByteArray valueByteArray = new ByteArray(valueBytes); if (previousValue != null && isOrdered) { int comp = previousValue.compareTo(valueByteArray); if (comp == 0) { return; //duplicate value } if (comp > 0) { logger.info("values not in ascending order, previous '{}', current '{}'", previousValue, valueByteArray); isOrdered = false; if (!trees.isEmpty()) { throw new IllegalStateException("Invalid input data. Unordered data cannot be split into multi trees"); } } } previousValue = valueByteArray; trieBuilder.addValue(valueBytes); curTreeSize += valueBytes.length; if (curTreeSize >= maxTrieTreeSize && isOrdered) { TrieDictionary<T> tree = trieBuilder.build(0); addTree(tree); reset(); } }
Example #3
Source File: RawSerializer.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override public List<ByteArray> deserialize(ByteBuffer in) { List<ByteArray> values = new ArrayList<>(); int size = BytesUtil.readVInt(in); if (size >= 0) { for (int i = 0; i < size; i++) { ByteArray ba = new ByteArray(BytesUtil.readByteArray(in)); if (ba.length() != 0) { values.add(ba); } } } else { throw new RuntimeException("Read error data size:" + size); } return values; }
Example #4
Source File: TrieDictionaryForest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void writeHead(DataOutput out) throws IOException { ByteArrayOutputStream byteBuf = new ByteArrayOutputStream(); DataOutputStream headOut = new DataOutputStream(byteBuf); headOut.writeInt(baseId); headOut.writeUTF(bytesConvert == null ? "" : bytesConvert.getClass().getName()); //write accuOffset headOut.writeInt(accuOffset.size()); for (int i = 0; i < accuOffset.size(); i++) headOut.writeInt(accuOffset.get(i)); //write valueDivide headOut.writeInt(valueDivide.size()); for (int i = 0; i < valueDivide.size(); i++) { ByteArray ba = valueDivide.get(i); byte[] byteStr = ba.toBytes(); headOut.writeInt(byteStr.length); headOut.write(byteStr); } //write tree size headOut.writeInt(trees.size()); headOut.close(); //output byte[] head = byteBuf.toByteArray(); out.writeInt(head.length); out.write(head); }
Example #5
Source File: RowKeyDecoder.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public long decode(byte[] bytes) throws IOException { this.values.clear(); long cuboidId = rowKeySplitter.split(bytes); initCuboid(cuboidId); ByteArray[] splits = rowKeySplitter.getSplitBuffers(); int offset = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboid id part for (int i = 0; i < this.cuboid.getColumns().size(); i++) { TblColRef col = this.cuboid.getColumns().get(i); collectValue(col, splits[offset].array(), splits[offset].offset(), splits[offset].length()); offset++; } return cuboidId; }
Example #6
Source File: TrieDictionaryForest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override public void dump(PrintStream out) { out.println("TrieDictionaryForest"); out.println("baseId:" + baseId); StringBuilder sb = new StringBuilder(); sb.append("value divide:"); for (ByteArray ba : valueDivide) sb.append(bytesConvert.convertFromBytes(ba.array(), 0, ba.length()) + " "); sb.append("\noffset divide:"); for (Integer offset : accuOffset) sb.append(offset + " "); out.println(sb.toString()); for (int i = 0; i < trees.size(); i++) { out.println("----tree " + i + "--------"); trees.get(i).dump(out); } }
Example #7
Source File: SegmentGTStartAndEnd.java From kylin with Apache License 2.0 | 6 votes |
private ByteArray encodeTime(long ts, int index, int roundingFlag) { String value; DataType partitionColType = info.getColumnType(index); if (partitionColType.isDate()) { value = DateFormat.formatToDateStr(ts); } else if (partitionColType.isTimeFamily()) { value = DateFormat.formatToTimeWithoutMilliStr(ts); } else if (partitionColType.isStringFamily() || partitionColType.isIntegerFamily()) {//integer like 20160101 String partitionDateFormat = segment.getModel().getPartitionDesc().getPartitionDateFormat(); if (StringUtils.isEmpty(partitionDateFormat)) { value = "" + ts; } else { value = DateFormat.formatToDateStr(ts, partitionDateFormat); } } else { throw new RuntimeException("Type " + partitionColType + " is not valid partition column type"); } ByteBuffer buffer = ByteBuffer.allocate(info.getMaxColumnLength()); info.getCodeSystem().encodeColumnValue(index, value, roundingFlag, buffer); return ByteArray.copyOf(buffer.array(), 0, buffer.position()); }
Example #8
Source File: FlinkCubingMerge.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Override public void reduce(Iterable<Tuple2<ByteArray, Object[]>> values, Collector<Tuple2<ByteArray, Object[]>> out) throws Exception { Object[] result = null; ByteArray key = null; for (Tuple2<ByteArray, Object[]> item : values) { key = item.f0; if (result == null) { result = item.f1; } else { Object[] temp = new Object[result.length]; aggregators.aggregate(item.f1, result, temp); result = temp; } } out.collect(new Tuple2<>(key, result)); }
Example #9
Source File: RemoteDictionaryStore.java From kylin with Apache License 2.0 | 6 votes |
int checkAndPut(ByteArray columnFamily, String rowkeyStr, int expectedValue, int putValue, boolean checkPrevious) throws IOException { byte[] rowkey = rowkeyStr.getBytes(StandardCharsets.UTF_8); if (rowkey.length == 0) { return ID_FOR_EMPTY_STR; } byte[] valueByte = Integer.toString(putValue).getBytes(StandardCharsets.UTF_8); Put put = new Put(rowkey); put.addColumn(columnFamily.array(), encodeQualifierName, valueByte); put.addColumn(columnFamily.array(), tsQualifierName, Bytes.toBytes(System.currentTimeMillis())); boolean hasPut = table.checkAndPut(rowkey, columnFamily.array(), encodeQualifierName, checkPrevious ? Integer.toString(expectedValue).getBytes(StandardCharsets.UTF_8) : null, put); if (hasPut) { if (printValue) { logger.debug("Encode {} to {}", rowkeyStr, putValue); } return putValue; } else { return ID_UNKNOWN; } }
Example #10
Source File: SparkCubingByLayer.java From kylin with Apache License 2.0 | 6 votes |
private Long getRDDCountSum(JavaPairRDD<ByteArray, Object[]> rdd, final int countMeasureIndex) { final ByteArray ONE = new ByteArray(); Long count = rdd.mapValues(new Function<Object[], Long>() { @Override public Long call(Object[] objects) throws Exception { return (Long) objects[countMeasureIndex]; } }).reduce(new Function2<Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>>() { @Override public Tuple2<ByteArray, Long> call(Tuple2<ByteArray, Long> longTuple2, Tuple2<ByteArray, Long> longTuple22) throws Exception { return new Tuple2<>(ONE, longTuple2._2() + longTuple22._2()); } })._2(); return count; }
Example #11
Source File: TopNCounterSerializerTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
@Test public void testSerialization() { TopNCounter<ByteArray> vs = new TopNCounter<ByteArray>(50); Integer[] stream = { 1, 1, 2, 9, 1, 2, 3, 7, 7, 1, 3, 1, 1 }; for (Integer i : stream) { vs.offer(new ByteArray(Bytes.toBytes(i))); } vs.sortAndRetain(); ByteBuffer out = ByteBuffer.allocate(1024); serializer.serialize(vs, out); byte[] copyBytes = new byte[out.position()]; System.arraycopy(out.array(), 0, copyBytes, 0, out.position()); ByteBuffer in = ByteBuffer.wrap(copyBytes); TopNCounter<ByteArray> vsNew = serializer.deserialize(in); Assert.assertEquals(vs.toString(), vsNew.toString()); }
Example #12
Source File: FlinkCubingByLayer.java From kylin with Apache License 2.0 | 6 votes |
@Override public void reduce(Iterable<Tuple2<ByteArray, Object[]>> iterable, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception { Object[] result = null; ByteArray key = null; for (Tuple2<ByteArray, Object[]> item : iterable) { key = item.f0; if (result == null) { result = item.f1; } else { Object[] temp = new Object[measureNum]; aggregators.aggregate(item.f1, result, temp); result = temp; } } collector.collect(new Tuple2<>(key, result)); }
Example #13
Source File: NDCuboidBuilder.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void buildKeyInternal(Cuboid parentCuboid, Cuboid childCuboid, ByteArray[] splitBuffers, ByteArray newKeyBodyBuf) { RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(childCuboid); // rowkey columns long mask = Long.highestOneBit(parentCuboid.getId()); long parentCuboidId = parentCuboid.getId(); long childCuboidId = childCuboid.getId(); long parentCuboidIdActualLength = (long)Long.SIZE - Long.numberOfLeadingZeros(parentCuboid.getId()); int index = rowKeySplitter.getBodySplitOffset(); // skip shard and cuboidId int offset = RowConstants.ROWKEY_SHARDID_LEN + RowConstants.ROWKEY_CUBOIDID_LEN; // skip shard and cuboidId for (int i = 0; i < parentCuboidIdActualLength; i++) { if ((mask & parentCuboidId) > 0) {// if the this bit position equals // 1 if ((mask & childCuboidId) > 0) {// if the child cuboid has this // column System.arraycopy(splitBuffers[index].array(), splitBuffers[index].offset(), newKeyBodyBuf.array(), offset, splitBuffers[index].length()); offset += splitBuffers[index].length(); } index++; } mask = mask >> 1; } rowkeyEncoder.fillHeader(newKeyBodyBuf.array()); }
Example #14
Source File: FlinkCubingByLayer.java From kylin with Apache License 2.0 | 6 votes |
@Override public void flatMap(Tuple2<ByteArray, Object[]> tuple2, Collector<Tuple2<ByteArray, Object[]>> collector) throws Exception { byte[] key = tuple2.f0.array(); long cuboidId = rowKeySplitter.parseCuboid(key); final List<Long> myChildren = cubeSegment.getCuboidScheduler().getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { return; } rowKeySplitter.split(key); final Cuboid parentCuboid = Cuboid.findForMandatory(cubeDesc, cuboidId); for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findForMandatory(cubeDesc, child); ByteArray result = ndCuboidBuilder.buildKey2(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); collector.collect(new Tuple2<>(result, tuple2.f1)); } }
Example #15
Source File: TopNCounterSerializerTest.java From kylin with Apache License 2.0 | 6 votes |
@Test public void testSerialization() { TopNCounter<ByteArray> vs = new TopNCounter<ByteArray>(50); Integer[] stream = { 1, 1, 2, 9, 1, 2, 3, 7, 7, 1, 3, 1, 1 }; for (Integer i : stream) { vs.offer(new ByteArray(Bytes.toBytes(i))); } vs.sortAndRetain(); ByteBuffer out = ByteBuffer.allocate(1024); serializer.serialize(vs, out); byte[] copyBytes = new byte[out.position()]; System.arraycopy(out.array(), 0, copyBytes, 0, out.position()); ByteBuffer in = ByteBuffer.wrap(copyBytes); TopNCounter<ByteArray> vsNew = serializer.deserialize(in); Assert.assertEquals(vs.toString(), vsNew.toString()); }
Example #16
Source File: ScanRangePlannerBase.java From kylin with Apache License 2.0 | 5 votes |
protected String makeReadable(ByteArray byteArray) { if (byteArray == null) { return null; } else { return byteArray.toReadableText(); } }
Example #17
Source File: DimEncodingPreserveOrderTest.java From kylin with Apache License 2.0 | 5 votes |
@Test public void testFixedLengthHexDimEncPreserveOrder() { FixedLenHexDimEnc enc = new FixedLenHexDimEnc(4); List<ByteArray> encodedValues = Lists.newArrayList(); encodedValues.add(encode(enc, "0000")); encodedValues.add(encode(enc, "0001")); encodedValues.add(encode(enc, "FFF0")); encodedValues.add(encode(enc, null)); assertTrue(Ordering.from(new DefaultGTComparator()).isOrdered(encodedValues)); }
Example #18
Source File: InvertIndexSearcher.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private EvalResult doEvalCompareIn(CompareTupleFilter filter) { EvalResult result = new EvalResult(); String column = filter.getColumn().getName(); ColInvertIndexSearcher colSearcher = colIndexSearchers.get(column); if (colSearcher == null) { return EvalResult.ALL_MATCH; } List<ImmutableRoaringBitmap> bitmaps = Lists.newArrayList(); for (Object value : filter.getValues()) { byte[] bytes = null; if (value instanceof ByteArray) { bytes = ((ByteArray)value).array(); } else if (value instanceof byte[]) { bytes = (byte[])value; } else if (value instanceof String) { bytes = Bytes.toBytes((String)value); } ImmutableRoaringBitmap bitmap = colSearcher.searchValue(bytes); if (bitmap != null) { bitmaps.add(bitmap); } } if (bitmaps.isEmpty()) { return result; } result.bitmap = ImmutableRoaringBitmap.or(bitmaps.toArray(new ImmutableRoaringBitmap[bitmaps.size()])); return result; }
Example #19
Source File: FragmentFileSearcher.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
protected ByteArray translate(int col, Object value, int roundingFlag) { try { buf.clear(); recordCodec.encodeDimension(col, value, roundingFlag, buf); int length = buf.position(); return ByteArray.copyOf(buf.array(), 0, length); } catch (IllegalArgumentException ex) { return null; } }
Example #20
Source File: RawSerializer.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
private List<ByteArray> current() { List<ByteArray> l = (List<ByteArray>) current.get(); if (l == null) { l = new ArrayList<ByteArray>(); current.set(l); } return l; }
Example #21
Source File: RawSerializer.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override public void serialize(List<ByteArray> values, ByteBuffer out) { if (values == null) { BytesUtil.writeVInt(0, out); } else { BytesUtil.writeVInt(values.size(), out); for (ByteArray array : values) { if (!out.hasRemaining() || out.remaining() < array.length()) { throw new RuntimeException("BufferOverflow! Please use one higher cardinality column for dimension column when build RAW cube!"); } BytesUtil.writeByteArray(BytesUtil.subarray(array.array(), array.offset(), array.offset() + array.length()), out); } } }
Example #22
Source File: TopNAggregator.java From kylin with Apache License 2.0 | 5 votes |
@Override public void aggregate(TopNCounter<ByteArray> value) { if (sum == null) { capacity = value.getCapacity(); sum = new TopNCounter<>(capacity * 10); } sum.merge(value); }
Example #23
Source File: RowKeySplitter.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) { this.enableSharding = cubeSeg.isEnableSharding(); this.cubeDesc = cubeSeg.getCubeDesc(); IDimensionEncodingMap dimEncoding = new CubeDimEncMap(cubeSeg); for (RowKeyColDesc rowKeyColDesc : cubeDesc.getRowkey().getRowKeyColumns()) { dimEncoding.get(rowKeyColDesc.getColRef()); } this.colIO = new RowKeyColumnIO(dimEncoding); this.splitBuffers = new ByteArray[splitLen]; this.splitOffsets = new int[splitLen]; this.bufferSize = 0; }
Example #24
Source File: GTScanRequest.java From kylin with Apache License 2.0 | 5 votes |
private GTRecord deserializeGTRecord(ByteBuffer in, GTInfo sInfo) { int colLength = BytesUtil.readVInt(in); ByteArray[] sCols = new ByteArray[colLength]; for (int i = 0; i < colLength; i++) { sCols[i] = ByteArray.importData(in); } return new GTRecord(sInfo, sCols); }
Example #25
Source File: ExtendedColumnMeasureType.java From kylin with Apache License 2.0 | 5 votes |
public IAdvMeasureFiller getAdvancedTupleFiller(FunctionDesc function, TupleInfo returnTupleInfo, Map<TblColRef, Dictionary<String>> dictionaryMap) { final TblColRef extended = getExtendedColumn(function); final int extendedColumnInTupleIdx = returnTupleInfo.hasColumn(extended) ? returnTupleInfo.getColumnIndex(extended) : -1; if (extendedColumnInTupleIdx == -1) { throw new RuntimeException("Extended column is not required in returnTupleInfo"); } return new IAdvMeasureFiller() { private String value; @Override public void reload(Object measureValue) { if (measureValue == null) { value = null; return; } ByteArray byteArray = (ByteArray) measureValue; //the array in ByteArray is guaranteed to be completed owned by the ByteArray value = Bytes.toString(byteArray.array()); } @Override public int getNumOfRows() { return 1; } @Override public void fillTuple(Tuple tuple, int row) { tuple.setDimensionValue(extendedColumnInTupleIdx, value); } }; }
Example #26
Source File: RawSerializer.java From kylin with Apache License 2.0 | 5 votes |
@Override public void serialize(List<ByteArray> values, ByteBuffer out) { if (values == null) { BytesUtil.writeVInt(0, out); } else { BytesUtil.writeVInt(values.size(), out); for (ByteArray array : values) { if (!out.hasRemaining() || out.remaining() < array.length()) { throw new RuntimeException("BufferOverflow! Please use one higher cardinality column for dimension column when build RAW cube!"); } BytesUtil.writeByteArray(BytesUtil.subarray(array.array(), array.offset(), array.offset() + array.length()), out); } } }
Example #27
Source File: SparkCubingByLayer.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); }
Example #28
Source File: RowKeyEncoder.java From kylin with Apache License 2.0 | 5 votes |
@Override public void encode(ByteArray bodyBytes, ByteArray outputBuf) { Preconditions.checkState(bodyBytes.length() == bodyLength); Preconditions.checkState(bodyBytes.length() + getHeaderLength() == outputBuf.length(), // "bodybytes length: " + bodyBytes.length() + " outputBuf length: " + outputBuf.length() + " header length: " + getHeaderLength()); System.arraycopy(bodyBytes.array(), bodyBytes.offset(), outputBuf.array(), getHeaderLength(), bodyLength); //fill shard and cuboid fillHeader(outputBuf.array()); }
Example #29
Source File: RawSerializerTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Test public void testNull() { List<ByteArray> output = doSAndD(null); assertEquals(output.size(), 0); List<ByteArray> input = new ArrayList<ByteArray>(); output = doSAndD(input); assertEquals(input, output); }
Example #30
Source File: RawSerializerTest.java From kylin with Apache License 2.0 | 5 votes |
@Test public void testNull() { List<ByteArray> output = doSAndD(null); assertEquals(output.size(), 0); List<ByteArray> input = new ArrayList<ByteArray>(); output = doSAndD(input); assertEquals(input, output); }