Java Code Examples for org.apache.flink.core.memory.MemorySegment#getLong()
The following examples show how to use
org.apache.flink.core.memory.MemorySegment#getLong() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0 | 6 votes |
/** * Inserts the given record into the hash table. * Note: this method doesn't care about whether a record with the same key is already present. * @param record The record to insert. * @throws IOException (EOFException specifically, if memory ran out) */ @Override public void insert(T record) throws IOException { if (closed) { return; } final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record)); final int bucket = hashCode & numBucketsMask; final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment final long firstPointer = bucketSegment.getLong(bucketOffset); try { final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record); bucketSegment.putLong(bucketOffset, newFirstPointer); } catch (EOFException ex) { compactOrThrow(); insert(record); return; } numElements++; resizeTableIfNecessary(); }
Example 2
Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0 | 5 votes |
/** * Searches the hash table for the record with the given key. * (If there would be multiple matches, only one is returned.) * @param record The record whose key we are searching for * @param targetForMatch If a match is found, it will be written here * @return targetForMatch if a match is found, otherwise null. */ @Override public T getMatchFor(PT record, T targetForMatch) { if (closed) { return null; } final int hashCode = MathUtils.jenkinsHash(probeTypeComparator.hash(record)); final int bucket = hashCode & numBucketsMask; bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment curElemPtr = bucketSegment.getLong(bucketOffset); pairComparator.setReference(record); T currentRecordInList = targetForMatch; prevElemPtr = INVALID_PREV_POINTER; try { while (curElemPtr != END_OF_LIST && !closed) { recordArea.setReadPosition(curElemPtr); nextPtr = recordArea.readPointer(); currentRecordInList = recordArea.readRecord(currentRecordInList); recordEnd = recordArea.getReadPosition(); if (pairComparator.equalToReference(currentRecordInList)) { // we found an element with a matching key, and not just a hash collision return currentRecordInList; } prevElemPtr = curElemPtr; curElemPtr = nextPtr; } } catch (IOException ex) { throw new RuntimeException("Error deserializing record from the hashtable: " + ex.getMessage(), ex); } return null; }
Example 3
Source File: LongHashPartition.java From flink with Apache License 2.0 | 5 votes |
void iteratorToDenseBucket(MemorySegment[] denseBuckets, long addressOffset, long globalMinKey) { int bucketOffset = 0; MemorySegment segment = buckets[bucketOffset]; int segOffset = 0; for (int i = 0; i < numBuckets; i++) { long address = segment.getLong(segOffset + 8); if (address != INVALID_ADDRESS) { long key = segment.getLong(segOffset); long denseBucket = key - globalMinKey; long denseBucketOffset = denseBucket << 3; int denseSegIndex = (int) (denseBucketOffset >>> segmentSizeBits); int denseSegOffset = (int) (denseBucketOffset & segmentSizeMask); denseBuckets[denseSegIndex].putLong(denseSegOffset, address + addressOffset); } // not last bucket, move to next. if (i != numBuckets - 1) { if (segOffset + 16 < segmentSize) { segOffset += 16; } else { segment = buckets[++bucketOffset]; segOffset = 0; } } } }
Example 4
Source File: StringNormalizedKeyComputer.java From flink with Apache License 2.0 | 5 votes |
@Override public void swapKey(MemorySegment segI, int offsetI, MemorySegment segJ, int offsetJ) { long temp0 = segI.getLong(offsetI); segI.putLong(offsetI, segJ.getLong(offsetJ)); segJ.putLong(offsetJ, temp0); }
Example 5
Source File: MutableHashTable.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void buildBloomFilterForExtraOverflowSegments(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) { int totalCount = 0; boolean skip = false; long forwardPointer = bucket.getLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET); while (forwardPointer != BUCKET_FORWARD_POINTER_NOT_SET) { final int overflowSegNum = (int) (forwardPointer >>> 32); if (overflowSegNum < 0 || overflowSegNum >= p.numOverflowSegments) { skip = true; break; } MemorySegment overflowSegment = p.overflowSegments[overflowSegNum]; int bucketInOverflowSegmentOffset = (int) forwardPointer; final int count = overflowSegment.getShort(bucketInOverflowSegmentOffset + HEADER_COUNT_OFFSET); totalCount += count; // The bits size of bloom filter per bucket is 112 * 8, while expected input entries is greater than 2048, the fpp would higher than 0.9, // which make the bloom filter an overhead instead of optimization. if (totalCount > 2048) { skip = true; break; } for (int i = 0; i < count; i++) { int hashCode = overflowSegment.getInt(bucketInOverflowSegmentOffset + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN); this.bloomFilter.addHash(hashCode); } forwardPointer = overflowSegment.getLong(bucketInOverflowSegmentOffset + HEADER_FORWARD_OFFSET); } if (!skip) { bucket.put(bucketInSegmentPos + HEADER_STATUS_OFFSET, BUCKET_STATUS_IN_FILTER); } }
Example 6
Source File: BinaryIndexedSortable.java From flink with Apache License 2.0 | 5 votes |
@Override public void swap(int segmentNumberI, int segmentOffsetI, int segmentNumberJ, int segmentOffsetJ) { final MemorySegment segI = this.sortIndex.get(segmentNumberI); final MemorySegment segJ = this.sortIndex.get(segmentNumberJ); // swap offset long index = segI.getLong(segmentOffsetI); segI.putLong(segmentOffsetI, segJ.getLong(segmentOffsetJ)); segJ.putLong(segmentOffsetJ, index); // swap key normalizedKeyComputer.swapKey(segI, segmentOffsetI + OFFSET_LEN, segJ, segmentOffsetJ + OFFSET_LEN); }
Example 7
Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0 | 5 votes |
/** * Searches the hash table for the record with the given key. * (If there would be multiple matches, only one is returned.) * @param record The record whose key we are searching for * @param targetForMatch If a match is found, it will be written here * @return targetForMatch if a match is found, otherwise null. */ @Override public T getMatchFor(PT record, T targetForMatch) { if (closed) { return null; } final int hashCode = MathUtils.jenkinsHash(probeTypeComparator.hash(record)); final int bucket = hashCode & numBucketsMask; bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment curElemPtr = bucketSegment.getLong(bucketOffset); pairComparator.setReference(record); T currentRecordInList = targetForMatch; prevElemPtr = INVALID_PREV_POINTER; try { while (curElemPtr != END_OF_LIST && !closed) { recordArea.setReadPosition(curElemPtr); nextPtr = recordArea.readPointer(); currentRecordInList = recordArea.readRecord(currentRecordInList); recordEnd = recordArea.getReadPosition(); if (pairComparator.equalToReference(currentRecordInList)) { // we found an element with a matching key, and not just a hash collision return currentRecordInList; } prevElemPtr = curElemPtr; curElemPtr = nextPtr; } } catch (IOException ex) { throw new RuntimeException("Error deserializing record from the hashtable: " + ex.getMessage(), ex); } return null; }
Example 8
Source File: LongHashPartition.java From flink with Apache License 2.0 | 5 votes |
/** * Returns an iterator for all the values for the given key, or null if no value found. */ public MatchIterator get(long key, int hashCode) { int bucket = hashCode & numBucketsMask; int bucketOffset = bucket << 4; MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits]; int segOffset = bucketOffset & segmentSizeMask; while (true) { long address = segment.getLong(segOffset + 8); if (address != INVALID_ADDRESS) { if (segment.getLong(segOffset) == key) { return valueIter(address); } else { bucket = (bucket + 1) & numBucketsMask; if (segOffset + 16 < segmentSize) { segOffset += 16; } else { bucketOffset = bucket << 4; segOffset = bucketOffset & segmentSizeMask; segment = buckets[bucketOffset >>> segmentSizeBits]; } } } else { return valueIter(INVALID_ADDRESS); } } }
Example 9
Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0 | 5 votes |
/** Same as above, but the number of bucket segments of the new table can be specified. */ private void rebuild(long newNumBucketSegments) throws IOException { // Get new bucket segments releaseBucketSegments(); allocateBucketSegments((int)newNumBucketSegments); T record = buildSideSerializer.createInstance(); try { EntryIterator iter = getEntryIterator(); recordArea.resetAppendPosition(); recordArea.setWritePosition(0); while ((record = iter.next(record)) != null && !closed) { final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record)); final int bucket = hashCode & numBucketsMask; final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment final long firstPointer = bucketSegment.getLong(bucketOffset); long ptrToAppended = recordArea.noSeekAppendPointerAndRecord(firstPointer, record); bucketSegment.putLong(bucketOffset, ptrToAppended); } recordArea.freeSegmentsAfterAppendPosition(); holes = 0; } catch (EOFException ex) { throw new RuntimeException("Bug in InPlaceMutableHashTable: we shouldn't get out of memory during a rebuild, " + "because we aren't allocating any new memory."); } }
Example 10
Source File: NormalizedKeySorter.java From flink with Apache License 2.0 | 4 votes |
@Override public void writeToOutput(ChannelWriterOutputView output, LargeRecordHandler<T> largeRecordsOutput) throws IOException { if (LOG.isDebugEnabled()) { if (largeRecordsOutput == null) { LOG.debug("Spilling sort buffer without large record handling."); } else { LOG.debug("Spilling sort buffer with large record handling."); } } final int numRecords = this.numRecords; int currentMemSeg = 0; int currentRecord = 0; while (currentRecord < numRecords) { final MemorySegment currentIndexSegment = this.sortIndex.get(currentMemSeg++); // go through all records in the memory segment for (int offset = 0; currentRecord < numRecords && offset <= this.lastIndexEntryOffset; currentRecord++, offset += this.indexEntrySize) { final long pointer = currentIndexSegment.getLong(offset); // small records go into the regular spill file, large records into the special code path if (pointer >= 0 || largeRecordsOutput == null) { this.recordBuffer.setReadPosition(pointer); this.serializer.copy(this.recordBuffer, output); } else { if (LOG.isDebugEnabled()) { LOG.debug("Spilling large record to large record fetch file."); } this.recordBuffer.setReadPosition(pointer & POINTER_MASK); T record = this.serializer.deserialize(this.recordBuffer); largeRecordsOutput.addRecord(record); } } } }
Example 11
Source File: LongHashPartition.java From flink with Apache License 2.0 | 4 votes |
/** * Update the address in array for given key. */ private void updateIndex( long key, int hashCode, long address, int size, MemorySegment dataSegment, int currentPositionInSegment) throws IOException { assert (numKeys <= numBuckets / 2); int bucketId = hashCode & numBucketsMask; // each bucket occupied 16 bytes (long key + long pointer to data address) int bucketOffset = bucketId * SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES; MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits]; int segOffset = bucketOffset & segmentSizeMask; long currAddress; while (true) { currAddress = segment.getLong(segOffset + 8); if (segment.getLong(segOffset) != key && currAddress != INVALID_ADDRESS) { // hash conflicts, the bucket is occupied by another key // TODO test Conflict resolution: // now: +1 +1 +1... cache friendly but more conflict, so we set factor to 0.5 // other1: +1 +2 +3... less conflict, factor can be 0.75 // other2: Secondary hashCode... less and less conflict, but need compute hash again bucketId = (bucketId + 1) & numBucketsMask; if (segOffset + SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES < segmentSize) { // if the new bucket still in current segment, we only need to update offset // within this segment segOffset += SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES; } else { // otherwise, we should re-calculate segment and offset bucketOffset = bucketId * 16; segment = buckets[bucketOffset >>> segmentSizeBits]; segOffset = bucketOffset & segmentSizeMask; } } else { break; } } if (currAddress == INVALID_ADDRESS) { // this is the first value for this key, put the address in array. segment.putLong(segOffset, key); segment.putLong(segOffset + 8, address); numKeys += 1; // dataSegment may be null if we only have to rehash bucket area if (dataSegment != null) { dataSegment.putLong(currentPositionInSegment, toAddrAndLen(INVALID_ADDRESS, size)); } if (numKeys * 2 > numBuckets) { resize(); } } else { // there are some values for this key, put the address in the front of them. dataSegment.putLong(currentPositionInSegment, toAddrAndLen(currAddress, size)); segment.putLong(segOffset + 8, address); } }
Example 12
Source File: CompactingHashTable.java From flink with Apache License 2.0 | 4 votes |
/** * utility function that inserts all entries from a bucket and its overflow buckets into the cache * * @return true if last bucket was not reached yet * @throws IOException */ private boolean fillCache() throws IOException { if(currentBucketIndex >= table.numBuckets) { return false; } MemorySegment bucket = table.buckets[currentSegmentIndex]; // get the basic characteristics of the bucket final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET); final InMemoryPartition<T> partition = table.partitions.get(partitionNumber); final MemorySegment[] overflowSegments = partition.overflowSegments; int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET); int numInSegment = 0; int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET; int bucketOffset = currentBucketOffset; // loop over all segments that are involved in the bucket (original bucket plus overflow buckets) while (true) { while (numInSegment < countInSegment) { long pointer = bucket.getLong(posInSegment); posInSegment += POINTER_LEN; numInSegment++; T target = table.buildSideSerializer.createInstance(); try { target = partition.readRecordAt(pointer, target); cache.add(target); } catch (IOException e) { throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e); } } // this segment is done. check if there is another chained bucket final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET); if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) { break; } final int overflowSegNum = (int) (forwardPointer >>> 32); bucket = overflowSegments[overflowSegNum]; bucketOffset = (int) forwardPointer; countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET); posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET; numInSegment = 0; } currentBucketIndex++; if(currentBucketIndex % bucketsPerSegment == 0) { currentSegmentIndex++; currentBucketOffset = 0; } else { currentBucketOffset += HASH_BUCKET_SIZE; } return true; }
Example 13
Source File: CompactingHashTable.java From flink with Apache License 2.0 | 4 votes |
public T getMatchFor(PT probeSideRecord, T reuse) { if (closed) { return null; } final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord)); final int posHashCode = searchHashCode % numBuckets; // get the bucket for the given hash code MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits]; int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS; // get the basic characteristics of the bucket final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET); final InMemoryPartition<T> p = partitions.get(partitionNumber); final MemorySegment[] overflowSegments = p.overflowSegments; this.pairComparator.setReference(probeSideRecord); int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET); int numInSegment = 0; int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH; // loop over all segments that are involved in the bucket (original bucket plus overflow buckets) while (true) { while (numInSegment < countInSegment) { final int thisCode = bucket.getInt(posInSegment); posInSegment += HASH_CODE_LEN; // check if the hash code matches if (thisCode == searchHashCode) { // get the pointer to the pair final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN); final long pointer = bucket.getLong(pointerOffset); numInSegment++; // deserialize the key to check whether it is really equal, or whether we had only a hash collision try { reuse = p.readRecordAt(pointer, reuse); if (this.pairComparator.equalToReference(reuse)) { this.partition = p; this.bucket = bucket; this.pointerOffsetInBucket = pointerOffset; return reuse; } } catch (IOException e) { throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e); } } else { numInSegment++; } } // this segment is done. check if there is another chained bucket final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET); if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) { return null; } final int overflowSegNum = (int) (forwardPointer >>> 32); bucket = overflowSegments[overflowSegNum]; bucketInSegmentOffset = (int) forwardPointer; countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET); posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH; numInSegment = 0; } }
Example 14
Source File: CompactingHashTable.java From flink with Apache License 2.0 | 4 votes |
/** * utility function that inserts all entries from a bucket and its overflow buckets into the cache * * @return true if last bucket was not reached yet * @throws IOException */ private boolean fillCache() throws IOException { if(currentBucketIndex >= table.numBuckets) { return false; } MemorySegment bucket = table.buckets[currentSegmentIndex]; // get the basic characteristics of the bucket final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET); final InMemoryPartition<T> partition = table.partitions.get(partitionNumber); final MemorySegment[] overflowSegments = partition.overflowSegments; int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET); int numInSegment = 0; int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET; int bucketOffset = currentBucketOffset; // loop over all segments that are involved in the bucket (original bucket plus overflow buckets) while (true) { while (numInSegment < countInSegment) { long pointer = bucket.getLong(posInSegment); posInSegment += POINTER_LEN; numInSegment++; T target = table.buildSideSerializer.createInstance(); try { target = partition.readRecordAt(pointer, target); cache.add(target); } catch (IOException e) { throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e); } } // this segment is done. check if there is another chained bucket final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET); if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) { break; } final int overflowSegNum = (int) (forwardPointer >>> 32); bucket = overflowSegments[overflowSegNum]; bucketOffset = (int) forwardPointer; countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET); posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET; numInSegment = 0; } currentBucketIndex++; if(currentBucketIndex % bucketsPerSegment == 0) { currentSegmentIndex++; currentBucketOffset = 0; } else { currentBucketOffset += HASH_BUCKET_SIZE; } return true; }
Example 15
Source File: CompactingHashTable.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Compacts (garbage collects) partition with copy-compact strategy using compaction partition * * @param partitionNumber partition to compact * @throws IOException */ private void compactPartition(final int partitionNumber) throws IOException { // do nothing if table was closed, parameter is invalid or no garbage exists if (this.closed || partitionNumber >= this.partitions.size() || this.partitions.get(partitionNumber).isCompacted()) { return; } // release all segments owned by compaction partition this.compactionMemory.clearAllMemory(availableMemory); this.compactionMemory.allocateSegments(1); this.compactionMemory.pushDownPages(); T tempHolder = this.buildSideSerializer.createInstance(); final int numPartitions = this.partitions.size(); InMemoryPartition<T> partition = this.partitions.remove(partitionNumber); MemorySegment[] overflowSegments = partition.overflowSegments; long pointer; int pointerOffset; int bucketOffset; final int bucketsPerSegment = this.bucketsPerSegmentMask + 1; for (int i = 0, bucket = partitionNumber; i < this.buckets.length && bucket < this.numBuckets; i++) { MemorySegment segment = this.buckets[i]; // go over all buckets in the segment belonging to the partition for (int k = bucket % bucketsPerSegment; k < bucketsPerSegment && bucket < this.numBuckets; k += numPartitions, bucket += numPartitions) { bucketOffset = k * HASH_BUCKET_SIZE; if((int)segment.get(bucketOffset + HEADER_PARTITION_OFFSET) != partitionNumber) { throw new IOException("Accessed wrong bucket! wanted: " + partitionNumber + " got: " + segment.get(bucketOffset + HEADER_PARTITION_OFFSET)); } // loop over all segments that are involved in the bucket (original bucket plus overflow buckets) int countInSegment = segment.getInt(bucketOffset + HEADER_COUNT_OFFSET); int numInSegment = 0; pointerOffset = bucketOffset + BUCKET_POINTER_START_OFFSET; while (true) { while (numInSegment < countInSegment) { pointer = segment.getLong(pointerOffset); tempHolder = partition.readRecordAt(pointer, tempHolder); pointer = this.compactionMemory.appendRecord(tempHolder); segment.putLong(pointerOffset, pointer); pointerOffset += POINTER_LEN; numInSegment++; } // this segment is done. check if there is another chained bucket final long forwardPointer = segment.getLong(bucketOffset + HEADER_FORWARD_OFFSET); if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) { break; } final int overflowSegNum = (int) (forwardPointer >>> 32); segment = overflowSegments[overflowSegNum]; bucketOffset = (int) forwardPointer; countInSegment = segment.getInt(bucketOffset + HEADER_COUNT_OFFSET); pointerOffset = bucketOffset + BUCKET_POINTER_START_OFFSET; numInSegment = 0; } segment = this.buckets[i]; } } // swap partition with compaction partition this.compactionMemory.setPartitionNumber(partitionNumber); this.partitions.add(partitionNumber, compactionMemory); this.partitions.get(partitionNumber).overflowSegments = partition.overflowSegments; this.partitions.get(partitionNumber).numOverflowSegments = partition.numOverflowSegments; this.partitions.get(partitionNumber).nextOverflowBucket = partition.nextOverflowBucket; this.partitions.get(partitionNumber).setIsCompacted(true); //this.partitions.get(partitionNumber).pushDownPages(); this.compactionMemory = partition; this.compactionMemory.resetRecordCounter(); this.compactionMemory.setPartitionNumber(-1); this.compactionMemory.overflowSegments = null; this.compactionMemory.numOverflowSegments = 0; this.compactionMemory.nextOverflowBucket = 0; // try to allocate maximum segment count this.compactionMemory.clearAllMemory(this.availableMemory); int maxSegmentNumber = this.getMaxPartition(); this.compactionMemory.allocateSegments(maxSegmentNumber); this.compactionMemory.resetRWViews(); this.compactionMemory.pushDownPages(); }
Example 16
Source File: LongHashPartition.java From flink with Apache License 2.0 | 4 votes |
/** * Update the address in array for given key. */ private void updateIndex( long key, int hashCode, long address, int size, MemorySegment dataSegment, int currentPositionInSegment) throws IOException { assert (numKeys <= numBuckets / 2); int bucketId = findBucket(hashCode); // each bucket occupied 16 bytes (long key + long pointer to data address) int bucketOffset = bucketId * SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES; MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits]; int segOffset = bucketOffset & segmentSizeMask; long currAddress; while (true) { currAddress = segment.getLong(segOffset + 8); if (segment.getLong(segOffset) != key && currAddress != INVALID_ADDRESS) { // hash conflicts, the bucket is occupied by another key // TODO test Conflict resolution: // now: +1 +1 +1... cache friendly but more conflict, so we set factor to 0.5 // other1: +1 +2 +3... less conflict, factor can be 0.75 // other2: Secondary hashCode... less and less conflict, but need compute hash again bucketId = (bucketId + 1) & numBucketsMask; if (segOffset + SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES < segmentSize) { // if the new bucket still in current segment, we only need to update offset // within this segment segOffset += SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES; } else { // otherwise, we should re-calculate segment and offset bucketOffset = bucketId * 16; segment = buckets[bucketOffset >>> segmentSizeBits]; segOffset = bucketOffset & segmentSizeMask; } } else { break; } } if (currAddress == INVALID_ADDRESS) { // this is the first value for this key, put the address in array. segment.putLong(segOffset, key); segment.putLong(segOffset + 8, address); numKeys += 1; // dataSegment may be null if we only have to rehash bucket area if (dataSegment != null) { dataSegment.putLong(currentPositionInSegment, toAddrAndLen(INVALID_ADDRESS, size)); } if (numKeys * 2 > numBuckets) { resize(); } } else { // there are some values for this key, put the address in the front of them. dataSegment.putLong(currentPositionInSegment, toAddrAndLen(currAddress, size)); segment.putLong(segOffset + 8, address); } }
Example 17
Source File: CompactingHashTable.java From flink with Apache License 2.0 | 4 votes |
public T getMatchFor(PT probeSideRecord) { if (closed) { return null; } final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord)); final int posHashCode = searchHashCode % numBuckets; // get the bucket for the given hash code MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits]; int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS; // get the basic characteristics of the bucket final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET); final InMemoryPartition<T> p = partitions.get(partitionNumber); final MemorySegment[] overflowSegments = p.overflowSegments; this.pairComparator.setReference(probeSideRecord); int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET); int numInSegment = 0; int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH; // loop over all segments that are involved in the bucket (original bucket plus overflow buckets) while (true) { while (numInSegment < countInSegment) { final int thisCode = bucket.getInt(posInSegment); posInSegment += HASH_CODE_LEN; // check if the hash code matches if (thisCode == searchHashCode) { // get the pointer to the pair final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN); final long pointer = bucket.getLong(pointerOffset); numInSegment++; // deserialize the key to check whether it is really equal, or whether we had only a hash collision try { T result = p.readRecordAt(pointer); if (this.pairComparator.equalToReference(result)) { this.partition = p; this.bucket = bucket; this.pointerOffsetInBucket = pointerOffset; return result; } } catch (IOException e) { throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e); } } else { numInSegment++; } } // this segment is done. check if there is another chained bucket final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET); if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) { return null; } final int overflowSegNum = (int) (forwardPointer >>> 32); bucket = overflowSegments[overflowSegNum]; bucketInSegmentOffset = (int) forwardPointer; countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET); posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH; numInSegment = 0; } }
Example 18
Source File: BytesHashMap.java From flink with Apache License 2.0 | 4 votes |
/** * @throws EOFException if the map can't allocate much more memory. */ private void growAndRehash() throws EOFException { // allocate the new data structures int required = 2 * bucketSegments.size(); if (required * (long) numBucketsPerSegment > Integer.MAX_VALUE) { LOG.warn("We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)"); throw new EOFException(); } List<MemorySegment> newBucketSegments = new ArrayList<>(required); try { int numAllocatedSegments = required - memoryPool.freePages(); if (numAllocatedSegments > 0) { throw new MemoryAllocationException(); } int needNumFromFreeSegments = required - newBucketSegments.size(); for (int end = needNumFromFreeSegments; end > 0; end--) { newBucketSegments.add(memoryPool.nextSegment()); } setBucketVariables(newBucketSegments); } catch (MemoryAllocationException e) { LOG.warn("BytesHashMap can't allocate {} pages, and now used {} pages", required, reservedNumBuffers); throw new EOFException(); } long reHashStartTime = System.currentTimeMillis(); resetBucketSegments(newBucketSegments); // Re-mask (we don't recompute the hashcode because we stored all 32 bits of it) for (MemorySegment memorySegment : bucketSegments) { for (int j = 0; j < numBucketsPerSegment; j++) { final long recordPointer = memorySegment.getLong(j * BUCKET_SIZE); if (recordPointer != END_OF_LIST) { final int hashCode1 = memorySegment.getInt(j * BUCKET_SIZE + ELEMENT_POINT_LENGTH); int newPos = hashCode1 & numBucketsMask; int bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits; int bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS; int step = STEP_INCREMENT; long hashCode2 = 0; while (newBucketSegments.get(bucketSegmentIndex).getLong(bucketOffset) != END_OF_LIST) { if (step == 1) { hashCode2 = calcSecondHashCode(hashCode1); } newPos = (int) ((hashCode1 + step * hashCode2) & numBucketsMask); // which segment contains the bucket bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits; // offset of the bucket in the segment bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS; step += STEP_INCREMENT; } newBucketSegments.get(bucketSegmentIndex).putLong(bucketOffset, recordPointer); newBucketSegments.get(bucketSegmentIndex).putInt(bucketOffset + ELEMENT_POINT_LENGTH, hashCode1); } } } LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), required); this.memoryPool.returnAll(this.bucketSegments); this.bucketSegments = newBucketSegments; }
Example 19
Source File: SkipListUtils.java From flink with Apache License 2.0 | 2 votes |
/** * Returns the value pointer. * * @param memorySegment memory segment for key space. * @param offset offset of key space in the memory segment. */ public static long getValuePointer(MemorySegment memorySegment, int offset) { return memorySegment.getLong(offset + VALUE_POINTER_OFFSET); }
Example 20
Source File: SkipListUtils.java From flink with Apache License 2.0 | 2 votes |
/** * Return the pointer to key space. * * @param memorySegment memory segment for value space. * @param offset offset of value space in memory segment. */ public static long getKeyPointer(MemorySegment memorySegment, int offset) { return memorySegment.getLong(offset + KEY_POINTER_OFFSET); }