org.apache.accumulo.core.data.Range#getEndKey

Source File: AccumuloClient.java From presto with Apache License 2.0

6 votes

private Collection<Range> splitByTabletBoundaries(String tableName, Collection<Range> ranges)
        throws org.apache.accumulo.core.client.TableNotFoundException, AccumuloException, AccumuloSecurityException
{
    ImmutableSet.Builder<Range> rangeBuilder = ImmutableSet.builder();
    for (Range range : ranges) {
        // if start and end key are equivalent, no need to split the range
        if (range.getStartKey() != null && range.getEndKey() != null && range.getStartKey().equals(range.getEndKey())) {
            rangeBuilder.add(range);
        }
        else {
            // Call out to Accumulo to split the range on tablets
            rangeBuilder.addAll(connector.tableOperations().splitRangeByTablets(tableName, range, Integer.MAX_VALUE));
        }
    }
    return rangeBuilder.build();
}

Source File: MatchingKeySkippingIterator.java From datawave with Apache License 2.0

6 votes

@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    // save parameters for future internal seeks
    latestRange = range;
    latestColumnFamilies = columnFamilies;
    latestInclusive = inclusive;
    lastKeyFound = null;
    
    Key startKey = range.getStartKey();
    Range seekRange = new Range(startKey == null ? null : new Key(startKey.getRow()), true, range.getEndKey(), range.isEndKeyInclusive());
    super.seek(seekRange, columnFamilies, inclusive);
    finished = false;
    
    if (getSource().hasTop()) {
        lastKeyFound = getSource().getTopKey();
        if (range.beforeStartKey(getSource().getTopKey()))
            consume();
    }
}

Source File: SeekingAggregator.java From datawave with Apache License 2.0

6 votes

/**
 * Advance an iterator until skip(...) returns false. May be a combination of seek() and next() calls
 * 
 * @param itr
 * @param pointer
 * @param currentRange
 * @param columnFamilies
 * @param includeColumnFamilies
 * @throws IOException
 */
protected void advanceItr(SortedKeyValueIterator<Key,Value> itr, ByteSequence pointer, Range currentRange, Collection<ByteSequence> columnFamilies,
                boolean includeColumnFamilies) throws IOException {
    Key current = itr.getTopKey();
    Text row = current.getRow();
    int nextCount = 0;
    while (current != null && skip(current, row, pointer)) {
        if (maxNextCount == -1 || nextCount < maxNextCount) {
            itr.next();
            nextCount++;
        } else {
            Key startKey = getSeekStartKey(current, pointer);
            Range newRange = new Range(startKey, false, currentRange.getEndKey(), currentRange.isEndKeyInclusive());
            itr.seek(newRange, columnFamilies, includeColumnFamilies);
            nextCount = 0;
        }
        
        current = itr.hasTop() ? itr.getTopKey() : null;
    }
}

Source File: RangeSplit.java From datawave with Apache License 2.0

6 votes

protected void addRange(Range range) {
    if (null == startKey) {
        startKey = range.getStartKey();
    } else {
        if (range.getStartKey().compareTo(startKey) < 0) {
            startKey = range.getStartKey();
        }
    }
    
    if (null == endKey) {
        endKey = range.getEndKey();
    } else {
        if (range.getEndKey().compareTo(endKey) > 0) {
            endKey = range.getEndKey();
        }
    }
    
    updateProgressDepth();
    
    this.ranges.add(range);
}

Source File: DatawaveFieldIndexCachingIteratorJexl.java From datawave with Apache License 2.0

6 votes

/**
 * Does the last range seeked contain the passed in range
 * 
 * @param r
 * @return true if there is a last seeked range and it contains the passed in range
 */
protected boolean lastRangeSeekedContains(Range r) {
    boolean subRange = false;
    if (this.lastRangeSeeked != null) {
        Key beginOfThisRange = r.getStartKey();
        Key endOfThisRange = r.getEndKey();
        subRange = true;
        if (beginOfThisRange == null && this.lastRangeSeeked.getStartKey() != null) {
            subRange = false;
        } else if (!Objects.equal(beginOfThisRange, this.lastRangeSeeked.getStartKey()) && !this.lastRangeSeeked.contains(beginOfThisRange)) {
            subRange = false;
        } else if (endOfThisRange == null && this.lastRangeSeeked.getEndKey() != null) {
            subRange = false;
        } else if (!Objects.equal(endOfThisRange, this.lastRangeSeeked.getEndKey()) && !this.lastRangeSeeked.contains(endOfThisRange)) {
            subRange = false;
        }
    }
    
    return subRange;
}

Source File: FirstNEntriesInRowIterator.java From accumulo-recipes with Apache License 2.0

6 votes

@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    // save parameters for future internal seeks
    latestRange = range;
    latestColumnFamilies = columnFamilies;
    latestInclusive = inclusive;
    lastRowFound = null;

    Key startKey = range.getStartKey();
    Range seekRange = new Range(startKey == null ? null : new Key(startKey.getRow()), true, range.getEndKey(), range.isEndKeyInclusive());
    getSource().seek(seekRange, columnFamilies, inclusive);
    finished = false;

    if (getSource().hasTop()) {
        lastRowFound = getSource().getTopKey().getRow();
        if (hasSeeked && range.beforeStartKey(getSource().getTopKey()))
            skipRow();
    }

    hasSeeked = true;

    prepKeys();
}

Source File: ShardIndexKeyFunctor.java From datawave with Apache License 2.0

5 votes

/**
 * Determine whether this range should be considered by the bloom filter.
 * 
 * @param range
 * @return true if it is to be considered, false otherwise
 */
static boolean isRangeInBloomFilter(Range range) {
    
    /**
     * If the range has no start key or no end key, then ignore the bloom filters
     */
    if (range.getStartKey() == null || range.getEndKey() == null) {
        return false;
    }
    
    /**
     * If this key is not in the bloom filter, then ignore the bloom filters
     */
    if (!isKeyInBloomFilter(range.getStartKey())) {
        return false;
    }
    
    /**
     * If the start key and the end key are equal up to the depth being considered, then we should consider the bloom filter.
     */
    if (range.getStartKey().equals(range.getEndKey(), PartialKey.ROW_COLFAM))
        return true;
    
    /**
     * If the end key is precisely the key immediately after the start key including everything up to the deleted flag, then we should consider the bloom
     * filter.
     */
    return range.getStartKey().followingKey(PartialKey.ROW_COLFAM).equals(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME)
                    && !range.isEndKeyInclusive();
}

Source File: AccumuloGraphLogger.java From vertexium with Apache License 2.0

5 votes

private void logStartIterator(String table, Range range, SortedSet<Column> fetchedColumns) {
    String fetchedColumnsString = fetchedColumnsToString(fetchedColumns);
    if (range == null || (range.getStartKey() == null && range.getEndKey() == null)) {
        queryLogger.trace("begin accumulo iterator %s: (%s): all items", table, fetchedColumnsString);
    } else {
        queryLogger.trace("begin accumulo iterator %s: (%s): %s - %s", table, fetchedColumnsString, keyToString(range.getStartKey()), keyToString(range.getEndKey()));
    }
}

Source File: AccumuloQueryRuleset.java From rya with Apache License 2.0

5 votes

private static boolean rangeContainsRange(final Range r1, final Range r2) {
    // 1. If r1.start is infinite, r1 contains r2.start
    if (!r1.isInfiniteStartKey()) {
        // 2. Otherwise, if r2.start is infinite, r1 can't contain r2
        if (r2.isInfiniteStartKey()) {
            return false;
        }
        final Key start2 = r2.getStartKey();
        // 3. If r2 is inclusive, r1 needs to contain r2's start key.
        if (r2.isStartKeyInclusive()) {
            if (!r1.contains(start2)) {
                return false;
            }
        }
        // 4. Otherwise, the only failure is if r2's start key comes first (they can be equal)
        else if (start2.compareTo(r1.getStartKey()) < 0) {
            return false;
        }
    }
    // Similar logic for end points
    if (!r1.isInfiniteStopKey()) {
        if (r2.isInfiniteStopKey()) {
            return false;
        }
        final Key end2 = r2.getEndKey();
        if (r2.isEndKeyInclusive()) {
            if (!r1.contains(end2)) {
                return false;
            }
        }
        else if (end2.compareTo(r1.getEndKey()) > 0) {
            return false;
        }
    }
    return true;
}

Source File: CompositeSeeker.java From datawave with Apache License 2.0

5 votes

@Override
public Key nextSeekKey(List<String> fields, Key currentKey, Range currentRange, String separator) {
    Key startKey = currentRange.getStartKey();
    Key endKey = currentRange.getEndKey();
    
    String currentValue = currentKey.getColumnQualifier().toString().split("\0")[0];
    
    String startColQual = startKey.getColumnQualifier().toString();
    String lowerBound = startColQual.split("\0")[0];
    
    String endColQual = endKey.getColumnQualifier().toString();
    String upperBound = endColQual.split("\0")[0];
    
    List<String> values = Arrays.asList(currentValue.split(separator));
    List<String> startValues = Arrays.asList(lowerBound.split(separator));
    List<String> endValues = Arrays.asList(upperBound.split(separator));
    
    String nextLowerBound = nextLowerBound(fields, values, separator, startValues, currentRange.isStartKeyInclusive(), endValues,
                    currentRange.isEndKeyInclusive());
    
    // build a new range only if the new lower bound exceeds the current value without exceeding the upper bound of the range
    if (nextLowerBound.compareTo(currentValue) > 0 && nextLowerBound.compareTo(upperBound) <= 0) {
        String newColQual = nextLowerBound + "\0";
        return new Key(currentKey.getRow(), currentKey.getColumnFamily(), new Text(newColQual), startKey.getColumnVisibility(), 0L);
    }
    
    return startKey;
}

Source File: CreateUidsIterator.java From datawave with Apache License 2.0

5 votes

/**
 * Method that ensures if we have to skip the current key, we do so with the contract provided by the create UID iterator.
 * 
 * @param range
 */
protected Range skipKey(Range range) {
    Key startKey = range.getStartKey();
    Key newKey = new Key(startKey.getRow(), startKey.getColumnFamily(), new Text(startKey.getColumnQualifier() + "\u0000\uffff"));
    return new Range(newKey, true, range.getEndKey(), range.isEndKeyInclusive());
    
}

Source File: ShardKeyFunctor.java From datawave with Apache License 2.0

5 votes

/**
 * Determine whether this range should be considered by the bloom filter.
 * 
 * @param range
 * @return true if it is to be considered, false otherwise
 */
static boolean isRangeInBloomFilter(Range range) {
    
    /**
     * If the range has no start key or no end key, then ignore the bloom filters
     */
    if (range.getStartKey() == null || range.getEndKey() == null) {
        return false;
    }
    
    /**
     * If this key is not in the bloom filter, then ignore the bloom filters
     */
    if (!isKeyInBloomFilter(range.getStartKey())) {
        return false;
    }
    
    /**
     * If the start key and the end key are equal up to the depth being considered, then we should consider the bloom filter.
     */
    if (range.getStartKey().equals(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL))
        return true;
    
    /**
     * If the end key is precisely the key immediately after the start key including everything up to the deleted flag, then we should consider the bloom
     * filter.
     */
    return range.getStartKey().followingKey(PartialKey.ROW_COLFAM_COLQUAL).equals(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME)
                    && !range.isEndKeyInclusive();
}

Source File: AndIterator.java From accumulo-recipes with Apache License 2.0

4 votes

private void doSeek(Range range) throws IOException {

        overallRange = new Range(range);

        if (range.getEndKey() != null && range.getEndKey().getRow() != null) {
            this.parentEndRow = range.getEndKey().getRow();
        }

        // seek each of the sources to the right column family within the row given by key
        for (int i = 0; i < sourcesCount; i++) {
            Key sourceKey;
            Text dataLocation = (sources[i].dataLocation == null) ? nullText : sources[i].dataLocation;
            if (range.getStartKey() != null) {
                // Build a key with the DocID if one is given
                if (range.getStartKey().getColumnFamily() != null) {
                    sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
                        (sources[i].term == null) ? nullText : new Text(sources[i].term + NULL_BYTE + range.getStartKey().getColumnFamily()));
                } // Build a key with just the term.
                else {
                    sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation,
                        (sources[i].term == null) ? nullText : sources[i].term);
                }
                if (!range.isStartKeyInclusive())
                    sourceKey = sourceKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL);
                sources[i].iter.seek(new Range(sourceKey, true, null, false), sources[i].seekColumnFamilies, SEEK_INCLUSIVE);
            } else {
                sources[i].iter.seek(range, sources[i].seekColumnFamilies, SEEK_INCLUSIVE);
            }
        }

        advanceToIntersection();

        if (hasTop()) {
            if (overallRange != null && !overallRange.contains(topKey)) {
                topKey = null;
                if (log.isDebugEnabled()) {
                    log.debug("doSeek, topKey is outside of overall range: " + overallRange);
                }
            }
        }
    }

Source File: OptimizedQueryIterator.java From accumulo-recipes with Apache License 2.0

4 votes

public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    if (log.isDebugEnabled()) {
        log.debug("seek, range:" + range);
    }
    // Test the range to see if it is event specific.
    if (null != range.getEndKey() && range.getEndKey().getColumnFamily() != null && range.getEndKey().getColumnFamily().getLength() != 0) {
        if (log.isDebugEnabled()) {
            log.debug("Jumping straight to the event");
        }
        // Then this range is for a specific event. We don't need to use the index iterator to find it, we can just
        // seek to it with the event iterator and evaluate it.
        eventSpecificRange = true;
        event.seek(range, columnFamilies, inclusive);
        if (event.hasTop()) {
            key = event.getTopKey();
            value = event.getTopValue();
        }
    } else {
        if (log.isDebugEnabled()) {
            log.debug("Using BooleanLogicIteratorJexl");
        }
        // Seek the boolean logic iterator
        index.seek(range, columnFamilies, inclusive);

        // If the index has a match, then seek the event to the key
        if (index.hasTop()) {
            Key eventKey = index.getTopKey();
            // Range eventRange = new Range(eventKey, eventKey);
            Range eventRange = new Range(eventKey.getRow());
            HashSet<ByteSequence> cf = new HashSet<ByteSequence>();
            cf.add(eventKey.getColumnFamilyData());
            event.seek(eventRange, cf, true);
            if (event.hasTop()) {
                key = event.getTopKey();
                value = event.getTopValue();
            } else {
                next();
            }
        }
    }
}

Source File: AccumuloSplitsProvider.java From geowave with Apache License 2.0

4 votes

public static GeoWaveRowRange fromAccumuloRange(final Range range, final int partitionKeyLength) {
  if (partitionKeyLength <= 0) {
    return new GeoWaveRowRange(
        null,
        range.getStartKey() == null ? null : range.getStartKey().getRowData().getBackingArray(),
        range.getEndKey() == null ? null : range.getEndKey().getRowData().getBackingArray(),
        range.isStartKeyInclusive(),
        range.isEndKeyInclusive());
  } else {
    byte[] partitionKey;
    boolean partitionKeyDiffers = false;
    if ((range.getStartKey() == null) && (range.getEndKey() == null)) {
      return null;
    } else if (range.getStartKey() != null) {
      partitionKey =
          ArrayUtils.subarray(
              range.getStartKey().getRowData().getBackingArray(),
              0,
              partitionKeyLength);
      if (range.getEndKey() != null) {
        partitionKeyDiffers =
            !Arrays.equals(
                partitionKey,
                ArrayUtils.subarray(
                    range.getEndKey().getRowData().getBackingArray(),
                    0,
                    partitionKeyLength));
      }
    } else {
      partitionKey =
          ArrayUtils.subarray(
              range.getEndKey().getRowData().getBackingArray(),
              0,
              partitionKeyLength);
    }
    return new GeoWaveRowRange(
        partitionKey,
        range.getStartKey() == null ? null
            : ArrayUtils.subarray(
                range.getStartKey().getRowData().getBackingArray(),
                partitionKeyLength,
                range.getStartKey().getRowData().getBackingArray().length),
        partitionKeyDiffers ? null
            : range.getEndKey() == null ? null
                : ArrayUtils.subarray(
                    range.getEndKey().getRowData().getBackingArray(),
                    partitionKeyLength,
                    range.getEndKey().getRowData().getBackingArray().length),
        range.isStartKeyInclusive(),
        partitionKeyDiffers ? true : range.isEndKeyInclusive());
  }
}

Source File: CreateUidsIteratorTest.java From datawave with Apache License 2.0

4 votes

/**
 * Ensure that for a known set of data the iterator will correctly seek to each next value.
 *
 * @throws IOException
 */
@Test
public void testReseek() throws IOException {
    // Setup data for test.
    TreeMap<Key,Value> data = new TreeMap<>();
    List<String> docIds = Arrays.asList("doc1", "doc2", "doc3", "doc4");
    Uid.List.Builder builder = Uid.List.newBuilder();
    builder.addAllUID(docIds);
    builder.setCOUNT(docIds.size());
    builder.setIGNORE(false);
    Value hasDocs = new Value(builder.build().toByteArray());
    
    List<String> expectedDocs = new LinkedList<>();
    for (int ii = 1; ii < 50; ii++) {
        expectedDocs.add("date_" + ii);
        data.put(new Key("row", "cf", "date_" + ii + "\u0000A"), hasDocs);
    }
    data.put(new Key("row", "cf", "date_2\u0000B"), hasDocs);
    
    // Setup iterator.
    CreateUidsIterator iterator = new CreateUidsIterator();
    iterator.init(new SortedMapIterator(data), null, null);
    
    Key startKey = new Key("row", "cf", "date_0");
    Key endKey = new Key("row", "cf", "date_\uffff");
    Range range = new Range(startKey, true, endKey, false);
    
    iterator.seek(range, Collections.emptySet(), false);
    assertTrue(iterator.hasTop());
    
    IndexInfo indexInfo = new IndexInfo();
    indexInfo.readFields(new DataInputStream(new ByteArrayInputStream(iterator.getTopValue().get())));
    assertTrue(iterator.getTopKey().getColumnQualifier().toString().startsWith("date_1"));
    
    Key topKey = iterator.getTopKey();
    String id = topKey.getColumnQualifier().toString();
    expectedDocs.remove(id);
    for (int ii = 2; ii <= 49; ii++) {
        Range seekRange = new Range(iterator.getTopKey(), false, range.getEndKey(), range.isEndKeyInclusive());
        iterator.seek(seekRange, Collections.emptySet(), false);
        if (iterator.hasTop()) {
            topKey = iterator.getTopKey();
            id = topKey.getColumnQualifier().toString();
            expectedDocs.remove(id);
        }
    }
    assertEquals("Items remaining " + expectedDocs, 0, expectedDocs.size());
}

Source File: AccumuloMrsPyramidInputFormat.java From mrgeo with Apache License 2.0

4 votes

/**
 * getSplits will retrieve all the splits for a job given a zoom level.
 *
 * @param context - the Job context.
 * @return The list of splits from the table.
 * @throws IOException when there is an issue getting the splits for the job.
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException
{

  // get the configuration
  Configuration conf = context.getConfiguration();

  // get the input context for the image
  ImageInputFormatContext tifc = ImageInputFormatContext.load(conf);

  // get the zoom level of the pyramid
  //int zl = tifc.getZoomLevel();

  // get the splits from the super (Accumulo InputFormatBase)
  List<InputSplit> splits = super.getSplits(context);

  // prepare the return list
  List<InputSplit> retList = new ArrayList<InputSplit>(splits.size());

  // TODO: make this pull back integer pairs - so there is no need to smooth and go through things again

  // make sure all the splits will conform to the splits type expected by the core
  //List<RangeInputSplit>splits2 = smoothSplits(splits);

  // go through all the splits and create the output splits
  for (InputSplit is : splits)
  {

    // an Accumulo split is a RangeInputSplit
    org.apache.accumulo.core.client.mapreduce.RangeInputSplit ris =
        (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) is;

    // get the range
    Range r = ris.getRange();

    log.info("Range: " + r.toString());

    // get the start
    Key sk = r.getStartKey();

    // get the end
    Key ek = r.getEndKey();

    // get the tile ids at the start and end of the range
    long sl = 0;
    long el = Long.MAX_VALUE >> 8;

    // check the start of the range - make sure it is a usable value
    if (sk != null)
    {
      Text sr = sk.getRow();
      if (sr.toString().equals(MrGeoAccumuloConstants.MRGEO_ACC_METADATA))
      {
        continue;
      }
      sl = AccumuloUtils.toLong(sr);
    }

    // check the end of the range - make sure it is a usable value
    if (ek != null)
    {
      Text er = ek.getRow();
      if (er.toString().equals(MrGeoAccumuloConstants.MRGEO_ACC_METADATA))
      {
        continue;
      }
      el = AccumuloUtils.toLong(er);
    }

    // build the split used by core
    TiledInputSplit tis = new TiledInputSplit(
        is, // input split
        sl, // start tile id
        el, // end tile id
        tifc.getZoomLevel(), // zoom level
        tifc.getTileSize() // tile size
    );
    retList.add(tis);

    Tile tile1 = TMSUtils.tileid(sl, tifc.getZoomLevel());
    Tile tile2 = TMSUtils.tileid(el, tifc.getZoomLevel());

    log.info("\tSplit starting at " + sl + " (" + tile1.tx + "," + tile1.ty + ")" + " and ending at " + el + " (" +
        tile2.tx + "," + tile2.ty + ")");

  }

  return retList;

}

Source File: RangeStreamScanner.java From datawave with Apache License 2.0

3 votes

/**
 * Override this for your specific implementation.
 * 
 * In this specific implementation our row key will be the term, the column family will be the field name, and the column family will be the shard,so we
 * should have the following as our last key
 * 
 * bar FOO:20130101_0
 * 
 * so we should append a null so that we we don't skip shards. similarly, an assumption is made of the key structure within this class.
 * 
 * @param lastKey
 * @param previousRange
 */
@Override
public Range buildNextRange(final Key lastKey, final Range previousRange) {
    
    /*
     * This path includes the following key from the shard_id onward. The reason we also append the hex 255 value is because we receive a key not unlike
     * foo:20130101_0. If our next search space is foo:20130101_0\x00 we will hit all data types within that range...again..and again...and again. To
     * account for this, we put \uffff after the null byte so that we start key is technically the last value within the provided shard, moving us to the
     * exact next key within our RangeStream
     */
    return new Range(new Key(lastKey.getRow(), lastKey.getColumnFamily(), new Text(lastKey.getColumnQualifier() + "\uffff")), true,
                    previousRange.getEndKey(), previousRange.isEndKeyInclusive());
}

Source File: ScannerSession.java From datawave with Apache License 2.0

2 votes

/**
 * Override this for your specific implementation.
 * 
 * @param lastKey
 * @param previousRange
 */
public Range buildNextRange(final Key lastKey, final Range previousRange) {
    return new Range(lastKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME), true, previousRange.getEndKey(), previousRange.isEndKeyInclusive());
}

Source File: Scan.java From datawave with Apache License 2.0

2 votes

/**
 * Override this for your specific implementation.
 * 
 * @param lastKey
 * @param previousRange
 */
public Range buildNextRange(final Key lastKey, final Range previousRange) {
    return new Range(lastKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL_COLVIS_TIME), true, previousRange.getEndKey(), previousRange.isEndKeyInclusive());
}

Java Code Examples for org.apache.accumulo.core.data.Range#getEndKey()