org.apache.flink.core.memory.MemorySegment#getLong

Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0

6 votes

/**
 * Inserts the given record into the hash table.
 * Note: this method doesn't care about whether a record with the same key is already present.
 * @param record The record to insert.
 * @throws IOException (EOFException specifically, if memory ran out)
    */
@Override
public void insert(T record) throws IOException {
	if (closed) {
		return;
	}

	final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record));
	final int bucket = hashCode & numBucketsMask;
	final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
	final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
	final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment
	final long firstPointer = bucketSegment.getLong(bucketOffset);

	try {
		final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record);
		bucketSegment.putLong(bucketOffset, newFirstPointer);
	} catch (EOFException ex) {
		compactOrThrow();
		insert(record);
		return;
	}

	numElements++;
	resizeTableIfNecessary();
}

Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0

5 votes

/**
 * Searches the hash table for the record with the given key.
 * (If there would be multiple matches, only one is returned.)
 * @param record The record whose key we are searching for
 * @param targetForMatch If a match is found, it will be written here
       * @return targetForMatch if a match is found, otherwise null.
       */
@Override
public T getMatchFor(PT record, T targetForMatch) {
	if (closed) {
		return null;
	}

	final int hashCode = MathUtils.jenkinsHash(probeTypeComparator.hash(record));
	final int bucket = hashCode & numBucketsMask;
	bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
	final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
	bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment

	curElemPtr = bucketSegment.getLong(bucketOffset);

	pairComparator.setReference(record);

	T currentRecordInList = targetForMatch;

	prevElemPtr = INVALID_PREV_POINTER;
	try {
		while (curElemPtr != END_OF_LIST && !closed) {
			recordArea.setReadPosition(curElemPtr);
			nextPtr = recordArea.readPointer();

			currentRecordInList = recordArea.readRecord(currentRecordInList);
			recordEnd = recordArea.getReadPosition();
			if (pairComparator.equalToReference(currentRecordInList)) {
				// we found an element with a matching key, and not just a hash collision
				return currentRecordInList;
			}

			prevElemPtr = curElemPtr;
			curElemPtr = nextPtr;
		}
	} catch (IOException ex) {
		throw new RuntimeException("Error deserializing record from the hashtable: " + ex.getMessage(), ex);
	}
	return null;
}

Source File: LongHashPartition.java From flink with Apache License 2.0

5 votes

void iteratorToDenseBucket(MemorySegment[] denseBuckets, long addressOffset,
		long globalMinKey) {
	int bucketOffset = 0;
	MemorySegment segment = buckets[bucketOffset];
	int segOffset = 0;
	for (int i = 0; i < numBuckets; i++) {
		long address = segment.getLong(segOffset + 8);
		if (address != INVALID_ADDRESS) {
			long key = segment.getLong(segOffset);
			long denseBucket = key - globalMinKey;
			long denseBucketOffset = denseBucket << 3;
			int denseSegIndex = (int) (denseBucketOffset >>> segmentSizeBits);
			int denseSegOffset = (int) (denseBucketOffset & segmentSizeMask);
			denseBuckets[denseSegIndex].putLong(denseSegOffset, address + addressOffset);
		}

		// not last bucket, move to next.
		if (i != numBuckets - 1) {
			if (segOffset + 16 < segmentSize) {
				segOffset += 16;
			} else {
				segment = buckets[++bucketOffset];
				segOffset = 0;
			}
		}
	}
}

Source File: StringNormalizedKeyComputer.java From flink with Apache License 2.0

5 votes

@Override
public void swapKey(MemorySegment segI, int offsetI,
		MemorySegment segJ, int offsetJ) {
	long temp0 = segI.getLong(offsetI);
	segI.putLong(offsetI, segJ.getLong(offsetJ));
	segJ.putLong(offsetJ, temp0);
}

Source File: MutableHashTable.java From Flink-CEPplus with Apache License 2.0

5 votes

private void buildBloomFilterForExtraOverflowSegments(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	int totalCount = 0;
	boolean skip = false;
	long forwardPointer = bucket.getLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET);
	while (forwardPointer != BUCKET_FORWARD_POINTER_NOT_SET) {
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		if (overflowSegNum < 0 || overflowSegNum >= p.numOverflowSegments) {
			skip = true;
			break;
		}
		MemorySegment overflowSegment = p.overflowSegments[overflowSegNum];
		int bucketInOverflowSegmentOffset = (int) forwardPointer;
		
		final int count = overflowSegment.getShort(bucketInOverflowSegmentOffset + HEADER_COUNT_OFFSET);
		totalCount += count;
		// The bits size of bloom filter per bucket is 112 * 8, while expected input entries is greater than 2048, the fpp would higher than 0.9,
		// which make the bloom filter an overhead instead of optimization.
		if (totalCount > 2048) {
			skip = true;
			break;
		}
		
		for (int i = 0; i < count; i++) {
			int hashCode = overflowSegment.getInt(bucketInOverflowSegmentOffset + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
			this.bloomFilter.addHash(hashCode);
		}
		
		forwardPointer = overflowSegment.getLong(bucketInOverflowSegmentOffset + HEADER_FORWARD_OFFSET);
		
	}
	
	if (!skip) {
		bucket.put(bucketInSegmentPos + HEADER_STATUS_OFFSET, BUCKET_STATUS_IN_FILTER);
	}
}

Source File: BinaryIndexedSortable.java From flink with Apache License 2.0

5 votes

@Override
public void swap(int segmentNumberI, int segmentOffsetI, int segmentNumberJ, int segmentOffsetJ) {
	final MemorySegment segI = this.sortIndex.get(segmentNumberI);
	final MemorySegment segJ = this.sortIndex.get(segmentNumberJ);

	// swap offset
	long index = segI.getLong(segmentOffsetI);
	segI.putLong(segmentOffsetI, segJ.getLong(segmentOffsetJ));
	segJ.putLong(segmentOffsetJ, index);

	// swap key
	normalizedKeyComputer.swapKey(segI, segmentOffsetI + OFFSET_LEN, segJ, segmentOffsetJ + OFFSET_LEN);
}

Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0

5 votes

/**
 * Searches the hash table for the record with the given key.
 * (If there would be multiple matches, only one is returned.)
 * @param record The record whose key we are searching for
 * @param targetForMatch If a match is found, it will be written here
       * @return targetForMatch if a match is found, otherwise null.
       */
@Override
public T getMatchFor(PT record, T targetForMatch) {
	if (closed) {
		return null;
	}

	final int hashCode = MathUtils.jenkinsHash(probeTypeComparator.hash(record));
	final int bucket = hashCode & numBucketsMask;
	bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
	final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
	bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment

	curElemPtr = bucketSegment.getLong(bucketOffset);

	pairComparator.setReference(record);

	T currentRecordInList = targetForMatch;

	prevElemPtr = INVALID_PREV_POINTER;
	try {
		while (curElemPtr != END_OF_LIST && !closed) {
			recordArea.setReadPosition(curElemPtr);
			nextPtr = recordArea.readPointer();

			currentRecordInList = recordArea.readRecord(currentRecordInList);
			recordEnd = recordArea.getReadPosition();
			if (pairComparator.equalToReference(currentRecordInList)) {
				// we found an element with a matching key, and not just a hash collision
				return currentRecordInList;
			}

			prevElemPtr = curElemPtr;
			curElemPtr = nextPtr;
		}
	} catch (IOException ex) {
		throw new RuntimeException("Error deserializing record from the hashtable: " + ex.getMessage(), ex);
	}
	return null;
}

Source File: LongHashPartition.java From flink with Apache License 2.0

5 votes

/**
 * Returns an iterator for all the values for the given key, or null if no value found.
 */
public MatchIterator get(long key, int hashCode) {
	int bucket = hashCode & numBucketsMask;

	int bucketOffset = bucket << 4;
	MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits];
	int segOffset = bucketOffset & segmentSizeMask;

	while (true) {
		long address = segment.getLong(segOffset + 8);
		if (address != INVALID_ADDRESS) {
			if (segment.getLong(segOffset) == key) {
				return valueIter(address);
			} else {
				bucket = (bucket + 1) & numBucketsMask;
				if (segOffset + 16 < segmentSize) {
					segOffset += 16;
				} else {
					bucketOffset = bucket << 4;
					segOffset = bucketOffset & segmentSizeMask;
					segment = buckets[bucketOffset >>> segmentSizeBits];
				}
			}
		} else {
			return valueIter(INVALID_ADDRESS);
		}
	}
}

Source File: InPlaceMutableHashTable.java From flink with Apache License 2.0

5 votes

/** Same as above, but the number of bucket segments of the new table can be specified. */
private void rebuild(long newNumBucketSegments) throws IOException {
	// Get new bucket segments
	releaseBucketSegments();
	allocateBucketSegments((int)newNumBucketSegments);

	T record = buildSideSerializer.createInstance();
	try {
		EntryIterator iter = getEntryIterator();
		recordArea.resetAppendPosition();
		recordArea.setWritePosition(0);
		while ((record = iter.next(record)) != null && !closed) {
			final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record));
			final int bucket = hashCode & numBucketsMask;
			final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
			final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
			final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment
			final long firstPointer = bucketSegment.getLong(bucketOffset);

			long ptrToAppended = recordArea.noSeekAppendPointerAndRecord(firstPointer, record);
			bucketSegment.putLong(bucketOffset, ptrToAppended);
		}
		recordArea.freeSegmentsAfterAppendPosition();
		holes = 0;

	} catch (EOFException ex) {
		throw new RuntimeException("Bug in InPlaceMutableHashTable: we shouldn't get out of memory during a rebuild, " +
			"because we aren't allocating any new memory.");
	}
}

Source File: NormalizedKeySorter.java From flink with Apache License 2.0

4 votes

@Override
public void writeToOutput(ChannelWriterOutputView output, LargeRecordHandler<T> largeRecordsOutput)
		throws IOException
{
	if (LOG.isDebugEnabled()) {
		if (largeRecordsOutput == null) {
			LOG.debug("Spilling sort buffer without large record handling.");
		} else {
			LOG.debug("Spilling sort buffer with large record handling.");
		}
	}
	
	final int numRecords = this.numRecords;
	int currentMemSeg = 0;
	int currentRecord = 0;
	
	while (currentRecord < numRecords) {
		final MemorySegment currentIndexSegment = this.sortIndex.get(currentMemSeg++);

		// go through all records in the memory segment
		for (int offset = 0; currentRecord < numRecords && offset <= this.lastIndexEntryOffset; currentRecord++, offset += this.indexEntrySize) {
			final long pointer = currentIndexSegment.getLong(offset);
			
			// small records go into the regular spill file, large records into the special code path
			if (pointer >= 0 || largeRecordsOutput == null) {
				this.recordBuffer.setReadPosition(pointer);
				this.serializer.copy(this.recordBuffer, output);
			}
			else {
				
				if (LOG.isDebugEnabled()) {
					LOG.debug("Spilling large record to large record fetch file.");
				}
				
				this.recordBuffer.setReadPosition(pointer & POINTER_MASK);
				T record = this.serializer.deserialize(this.recordBuffer);
				largeRecordsOutput.addRecord(record);
			}
		}
	}
}

Source File: LongHashPartition.java From flink with Apache License 2.0

4 votes

/**
 * Update the address in array for given key.
 */
private void updateIndex(
		long key,
		int hashCode,
		long address,
		int size,
		MemorySegment dataSegment,
		int currentPositionInSegment) throws IOException {
	assert (numKeys <= numBuckets / 2);
	int bucketId = hashCode & numBucketsMask;

	// each bucket occupied 16 bytes (long key + long pointer to data address)
	int bucketOffset = bucketId * SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
	MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits];
	int segOffset = bucketOffset & segmentSizeMask;
	long currAddress;

	while (true) {
		currAddress = segment.getLong(segOffset + 8);
		if (segment.getLong(segOffset) != key && currAddress != INVALID_ADDRESS) {
			// hash conflicts, the bucket is occupied by another key

			// TODO test Conflict resolution:
			// now:    +1 +1 +1... cache friendly but more conflict, so we set factor to 0.5
			// other1: +1 +2 +3... less conflict, factor can be 0.75
			// other2: Secondary hashCode... less and less conflict, but need compute hash again
			bucketId = (bucketId + 1) & numBucketsMask;
			if (segOffset + SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES < segmentSize) {
				// if the new bucket still in current segment, we only need to update offset
				// within this segment
				segOffset += SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
			} else {
				// otherwise, we should re-calculate segment and offset
				bucketOffset = bucketId * 16;
				segment = buckets[bucketOffset >>> segmentSizeBits];
				segOffset = bucketOffset & segmentSizeMask;
			}
		} else {
			break;
		}
	}
	if (currAddress == INVALID_ADDRESS) {
		// this is the first value for this key, put the address in array.
		segment.putLong(segOffset, key);
		segment.putLong(segOffset + 8, address);
		numKeys += 1;
		// dataSegment may be null if we only have to rehash bucket area
		if (dataSegment != null) {
			dataSegment.putLong(currentPositionInSegment, toAddrAndLen(INVALID_ADDRESS, size));
		}
		if (numKeys * 2 > numBuckets) {
			resize();
		}
	} else {
		// there are some values for this key, put the address in the front of them.
		dataSegment.putLong(currentPositionInSegment, toAddrAndLen(currAddress, size));
		segment.putLong(segOffset + 8, address);
	}
}

Source File: CompactingHashTable.java From flink with Apache License 2.0

4 votes

/**
 * utility function that inserts all entries from a bucket and its overflow buckets into the cache
 * 
 * @return true if last bucket was not reached yet
 * @throws IOException
 */
private boolean fillCache() throws IOException {
	if(currentBucketIndex >= table.numBuckets) {
		return false;
	}
	MemorySegment bucket = table.buckets[currentSegmentIndex];
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> partition = table.partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = partition.overflowSegments;
	
	int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET;
	int bucketOffset = currentBucketOffset;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		while (numInSegment < countInSegment) {
			long pointer = bucket.getLong(posInSegment);
			posInSegment += POINTER_LEN;
			numInSegment++;
			T target = table.buildSideSerializer.createInstance();
			try {
				target = partition.readRecordAt(pointer, target);
				cache.add(target);
			} catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e);
			}
		}
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			break;
		}
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET;
		numInSegment = 0;
	}
	currentBucketIndex++;
	if(currentBucketIndex % bucketsPerSegment == 0) {
		currentSegmentIndex++;
		currentBucketOffset = 0;
	} else {
		currentBucketOffset += HASH_BUCKET_SIZE;
	}
	return true;
}

Source File: CompactingHashTable.java From flink with Apache License 2.0

4 votes

public T getMatchFor(PT probeSideRecord, T reuse) {
	if (closed) {
		return null;
	}
	final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord));
	
	final int posHashCode = searchHashCode % numBuckets;
	
	// get the bucket for the given hash code
	MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits];
	int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
	
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> p = partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = p.overflowSegments;
	
	this.pairComparator.setReference(probeSideRecord);
	
	int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		
		while (numInSegment < countInSegment) {
			
			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;
				
			// check if the hash code matches
			if (thisCode == searchHashCode) {
				// get the pointer to the pair
				final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN);
				final long pointer = bucket.getLong(pointerOffset);
				numInSegment++;
				
				// deserialize the key to check whether it is really equal, or whether we had only a hash collision
				try {
					reuse = p.readRecordAt(pointer, reuse);
					
					if (this.pairComparator.equalToReference(reuse)) {
						this.partition = p;
						this.bucket = bucket;
						this.pointerOffsetInBucket = pointerOffset;
						return reuse;
					}
				}
				catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e);
				}
			}
			else {
				numInSegment++;
			}
		}
		
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return null;
		}
		
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketInSegmentOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInSegment = 0;
	}
}

Source File: CompactingHashTable.java From flink with Apache License 2.0

4 votes

/**
 * utility function that inserts all entries from a bucket and its overflow buckets into the cache
 * 
 * @return true if last bucket was not reached yet
 * @throws IOException
 */
private boolean fillCache() throws IOException {
	if(currentBucketIndex >= table.numBuckets) {
		return false;
	}
	MemorySegment bucket = table.buckets[currentSegmentIndex];
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> partition = table.partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = partition.overflowSegments;
	
	int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET;
	int bucketOffset = currentBucketOffset;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		while (numInSegment < countInSegment) {
			long pointer = bucket.getLong(posInSegment);
			posInSegment += POINTER_LEN;
			numInSegment++;
			T target = table.buildSideSerializer.createInstance();
			try {
				target = partition.readRecordAt(pointer, target);
				cache.add(target);
			} catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e);
			}
		}
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			break;
		}
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET;
		numInSegment = 0;
	}
	currentBucketIndex++;
	if(currentBucketIndex % bucketsPerSegment == 0) {
		currentSegmentIndex++;
		currentBucketOffset = 0;
	} else {
		currentBucketOffset += HASH_BUCKET_SIZE;
	}
	return true;
}

Source File: CompactingHashTable.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Compacts (garbage collects) partition with copy-compact strategy using compaction partition
 * 
 * @param partitionNumber partition to compact
 * @throws IOException 
 */
private void compactPartition(final int partitionNumber) throws IOException {
	// do nothing if table was closed, parameter is invalid or no garbage exists
	if (this.closed || partitionNumber >= this.partitions.size() || this.partitions.get(partitionNumber).isCompacted()) {
		return;
	}
	// release all segments owned by compaction partition
	this.compactionMemory.clearAllMemory(availableMemory);
	this.compactionMemory.allocateSegments(1);
	this.compactionMemory.pushDownPages();
	T tempHolder = this.buildSideSerializer.createInstance();
	final int numPartitions = this.partitions.size();
	InMemoryPartition<T> partition = this.partitions.remove(partitionNumber);
	MemorySegment[] overflowSegments = partition.overflowSegments;
	long pointer;
	int pointerOffset;
	int bucketOffset;
	final int bucketsPerSegment = this.bucketsPerSegmentMask + 1;
	for (int i = 0, bucket = partitionNumber; i < this.buckets.length && bucket < this.numBuckets; i++) {
		MemorySegment segment = this.buckets[i];
		// go over all buckets in the segment belonging to the partition
		for (int k = bucket % bucketsPerSegment; k < bucketsPerSegment && bucket < this.numBuckets; k += numPartitions, bucket += numPartitions) {
			bucketOffset = k * HASH_BUCKET_SIZE;
			if((int)segment.get(bucketOffset + HEADER_PARTITION_OFFSET) != partitionNumber) {
				throw new IOException("Accessed wrong bucket! wanted: " + partitionNumber + " got: " + segment.get(bucketOffset + HEADER_PARTITION_OFFSET));
			}
			// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
			int countInSegment = segment.getInt(bucketOffset + HEADER_COUNT_OFFSET);
			int numInSegment = 0;
			pointerOffset = bucketOffset + BUCKET_POINTER_START_OFFSET;
			while (true) {
				while (numInSegment < countInSegment) {
					pointer = segment.getLong(pointerOffset);
					tempHolder = partition.readRecordAt(pointer, tempHolder);
					pointer = this.compactionMemory.appendRecord(tempHolder);
					segment.putLong(pointerOffset, pointer);
					pointerOffset += POINTER_LEN;
					numInSegment++;
				}
				// this segment is done. check if there is another chained bucket
				final long forwardPointer = segment.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
				if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
					break;
				}
				final int overflowSegNum = (int) (forwardPointer >>> 32);
				segment = overflowSegments[overflowSegNum];
				bucketOffset = (int) forwardPointer;
				countInSegment = segment.getInt(bucketOffset + HEADER_COUNT_OFFSET);
				pointerOffset = bucketOffset + BUCKET_POINTER_START_OFFSET;
				numInSegment = 0;
			}
			segment = this.buckets[i];
		}
	}
	// swap partition with compaction partition
	this.compactionMemory.setPartitionNumber(partitionNumber);
	this.partitions.add(partitionNumber, compactionMemory);
	this.partitions.get(partitionNumber).overflowSegments = partition.overflowSegments;
	this.partitions.get(partitionNumber).numOverflowSegments = partition.numOverflowSegments;
	this.partitions.get(partitionNumber).nextOverflowBucket = partition.nextOverflowBucket;
	this.partitions.get(partitionNumber).setIsCompacted(true);
	//this.partitions.get(partitionNumber).pushDownPages();
	this.compactionMemory = partition;
	this.compactionMemory.resetRecordCounter();
	this.compactionMemory.setPartitionNumber(-1);
	this.compactionMemory.overflowSegments = null;
	this.compactionMemory.numOverflowSegments = 0;
	this.compactionMemory.nextOverflowBucket = 0;
	// try to allocate maximum segment count
	this.compactionMemory.clearAllMemory(this.availableMemory);
	int maxSegmentNumber = this.getMaxPartition();
	this.compactionMemory.allocateSegments(maxSegmentNumber);
	this.compactionMemory.resetRWViews();
	this.compactionMemory.pushDownPages();
}

Source File: LongHashPartition.java From flink with Apache License 2.0

4 votes

/**
 * Update the address in array for given key.
 */
private void updateIndex(
		long key,
		int hashCode,
		long address,
		int size,
		MemorySegment dataSegment,
		int currentPositionInSegment) throws IOException {
	assert (numKeys <= numBuckets / 2);
	int bucketId = findBucket(hashCode);

	// each bucket occupied 16 bytes (long key + long pointer to data address)
	int bucketOffset = bucketId * SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
	MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits];
	int segOffset = bucketOffset & segmentSizeMask;
	long currAddress;

	while (true) {
		currAddress = segment.getLong(segOffset + 8);
		if (segment.getLong(segOffset) != key && currAddress != INVALID_ADDRESS) {
			// hash conflicts, the bucket is occupied by another key

			// TODO test Conflict resolution:
			// now:    +1 +1 +1... cache friendly but more conflict, so we set factor to 0.5
			// other1: +1 +2 +3... less conflict, factor can be 0.75
			// other2: Secondary hashCode... less and less conflict, but need compute hash again
			bucketId = (bucketId + 1) & numBucketsMask;
			if (segOffset + SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES < segmentSize) {
				// if the new bucket still in current segment, we only need to update offset
				// within this segment
				segOffset += SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
			} else {
				// otherwise, we should re-calculate segment and offset
				bucketOffset = bucketId * 16;
				segment = buckets[bucketOffset >>> segmentSizeBits];
				segOffset = bucketOffset & segmentSizeMask;
			}
		} else {
			break;
		}
	}
	if (currAddress == INVALID_ADDRESS) {
		// this is the first value for this key, put the address in array.
		segment.putLong(segOffset, key);
		segment.putLong(segOffset + 8, address);
		numKeys += 1;
		// dataSegment may be null if we only have to rehash bucket area
		if (dataSegment != null) {
			dataSegment.putLong(currentPositionInSegment, toAddrAndLen(INVALID_ADDRESS, size));
		}
		if (numKeys * 2 > numBuckets) {
			resize();
		}
	} else {
		// there are some values for this key, put the address in the front of them.
		dataSegment.putLong(currentPositionInSegment, toAddrAndLen(currAddress, size));
		segment.putLong(segOffset + 8, address);
	}
}

Source File: CompactingHashTable.java From flink with Apache License 2.0

4 votes

public T getMatchFor(PT probeSideRecord) {
	if (closed) {
		return null;
	}
	final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord));

	final int posHashCode = searchHashCode % numBuckets;

	// get the bucket for the given hash code
	MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits];
	int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;

	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> p = partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = p.overflowSegments;

	this.pairComparator.setReference(probeSideRecord);

	int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {

		while (numInSegment < countInSegment) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			// check if the hash code matches
			if (thisCode == searchHashCode) {
				// get the pointer to the pair
				final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN);
				final long pointer = bucket.getLong(pointerOffset);
				numInSegment++;

				// deserialize the key to check whether it is really equal, or whether we had only a hash collision
				try {
					T result = p.readRecordAt(pointer);

					if (this.pairComparator.equalToReference(result)) {
						this.partition = p;
						this.bucket = bucket;
						this.pointerOffsetInBucket = pointerOffset;
						return result;
					}
				}
				catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e);
				}
			}
			else {
				numInSegment++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return null;
		}

		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketInSegmentOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInSegment = 0;
	}
}

Source File: BytesHashMap.java From flink with Apache License 2.0

4 votes

/**
 * @throws EOFException if the map can't allocate much more memory.
 */
private void growAndRehash() throws EOFException {
	// allocate the new data structures
	int required = 2 * bucketSegments.size();
	if (required * (long) numBucketsPerSegment > Integer.MAX_VALUE) {
		LOG.warn("We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)");
		throw new EOFException();
	}
	List<MemorySegment> newBucketSegments = new ArrayList<>(required);

	try {
		int numAllocatedSegments = required - memoryPool.freePages();
		if (numAllocatedSegments > 0) {
			throw new MemoryAllocationException();
		}
		int needNumFromFreeSegments = required - newBucketSegments.size();
		for (int end = needNumFromFreeSegments; end > 0; end--) {
			newBucketSegments.add(memoryPool.nextSegment());
		}

		setBucketVariables(newBucketSegments);
	} catch (MemoryAllocationException e) {
		LOG.warn("BytesHashMap can't allocate {} pages, and now used {} pages",
				required, reservedNumBuffers);
		throw new EOFException();
	}
	long reHashStartTime = System.currentTimeMillis();
	resetBucketSegments(newBucketSegments);
	// Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
	for (MemorySegment memorySegment : bucketSegments) {
		for (int j = 0; j < numBucketsPerSegment; j++) {
			final long recordPointer = memorySegment.getLong(j * BUCKET_SIZE);
			if (recordPointer != END_OF_LIST) {
				final int hashCode1 = memorySegment.getInt(j * BUCKET_SIZE + ELEMENT_POINT_LENGTH);
				int newPos = hashCode1 & numBucketsMask;
				int bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits;
				int bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS;
				int step = STEP_INCREMENT;
				long hashCode2 = 0;
				while (newBucketSegments.get(bucketSegmentIndex).getLong(bucketOffset) != END_OF_LIST) {
					if (step == 1) {
						hashCode2 = calcSecondHashCode(hashCode1);
					}
					newPos = (int) ((hashCode1 + step * hashCode2) & numBucketsMask);
					// which segment contains the bucket
					bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits;
					// offset of the bucket in the segment
					bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS;
					step += STEP_INCREMENT;
				}
				newBucketSegments.get(bucketSegmentIndex).putLong(bucketOffset, recordPointer);
				newBucketSegments.get(bucketSegmentIndex).putInt(bucketOffset + ELEMENT_POINT_LENGTH, hashCode1);
			}
		}
	}
	LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), required);
	this.memoryPool.returnAll(this.bucketSegments);
	this.bucketSegments = newBucketSegments;
}

Source File: SkipListUtils.java From flink with Apache License 2.0

2 votes

/**
 * Returns the value pointer.
 *
 * @param memorySegment memory segment for key space.
 * @param offset offset of key space in the memory segment.
 */
public static long getValuePointer(MemorySegment memorySegment, int offset) {
	return memorySegment.getLong(offset + VALUE_POINTER_OFFSET);
}

Source File: SkipListUtils.java From flink with Apache License 2.0

2 votes

/**
 * Return the pointer to key space.
 *
 * @param memorySegment memory segment for value space.
 * @param offset offset of value space in memory segment.
 */
public static long getKeyPointer(MemorySegment memorySegment, int offset) {
	return memorySegment.getLong(offset + KEY_POINTER_OFFSET);
}

Java Code Examples for org.apache.flink.core.memory.MemorySegment#getLong()