org.apache.accumulo.core.iterators.SortedKeyValueIterator#getTopKey

Source File: SeekingAggregator.java From datawave with Apache License 2.0

6 votes

/**
 * Advance an iterator until skip(...) returns false. May be a combination of seek() and next() calls
 * 
 * @param itr
 * @param pointer
 * @param currentRange
 * @param columnFamilies
 * @param includeColumnFamilies
 * @throws IOException
 */
protected void advanceItr(SortedKeyValueIterator<Key,Value> itr, ByteSequence pointer, Range currentRange, Collection<ByteSequence> columnFamilies,
                boolean includeColumnFamilies) throws IOException {
    Key current = itr.getTopKey();
    Text row = current.getRow();
    int nextCount = 0;
    while (current != null && skip(current, row, pointer)) {
        if (maxNextCount == -1 || nextCount < maxNextCount) {
            itr.next();
            nextCount++;
        } else {
            Key startKey = getSeekStartKey(current, pointer);
            Range newRange = new Range(startKey, false, currentRange.getEndKey(), currentRange.isEndKeyInclusive());
            itr.seek(newRange, columnFamilies, includeColumnFamilies);
            nextCount = 0;
        }
        
        current = itr.hasTop() ? itr.getTopKey() : null;
    }
}

Source File: QueryFilterIterator.java From geowave with Apache License 2.0

6 votes

protected static void findTopEnhanced(
    final SortedKeyValueIterator<Key, Value> source,
    final Filter filter) {
  Key key;
  if (source.hasTop()) {
    key = source.getTopKey();
  } else {
    return;
  }
  while (!key.isDeleted() && !filter.accept(key, source.getTopValue())) {
    try {
      source.next();
      if (source.hasTop()) {
        key = source.getTopKey();
      } else {
        return;
      }
    } catch (final IOException e) {
      throw new RuntimeException(e);
    }
  }
}

Source File: TLDFieldIndexAggregator.java From datawave with Apache License 2.0

5 votes

public Key apply(SortedKeyValueIterator<Key,Value> itr, Document d, AttributeFactory af) throws IOException {
    Key key = itr.getTopKey();
    ByteSequence parentId = parseRootPointerFromFI(key.getColumnQualifierData());
    Text row = key.getRow();
    ByteSequence docId = null;
    Key nextKey = key;
    do {
        key = nextKey;
        String field = key.getColumnFamily().toString().substring(3);
        String value = key.getColumnQualifier().toString();
        value = value.substring(0, value.indexOf('\0'));
        Attribute<?> attr = af.create(field, value, key, true);
        // in addition to keeping fields that the filter indicates should be kept, also keep fields that the filter applies. This is due to inconsistent
        // behavior between event/tld queries where an index only field index will be kept except when it is a child of a tld
        attr.setToKeep((fieldsToAggregate == null || fieldsToAggregate.contains(JexlASTHelper.removeGroupingContext(field)))
                        && (attrFilter == null || attrFilter.keep(key)));
        d.put(field, attr);
        
        ByteSequence thisId = parsePointerFromFI(key.getColumnQualifierData());
        if (docId == null || !docId.equals(thisId)) {
            docId = thisId;
            Key docKey = new Key(key.getRow(), new Text(docId.toArray()), new Text(), ColumnVisibilityCache.get(key.getColumnVisibilityData()),
                            key.getTimestamp());
            attr = new DocumentKey(docKey, false);
            d.put(Document.DOCKEY_FIELD_NAME, attr);
        }
        itr.next();
        nextKey = itr.hasTop() ? itr.getTopKey() : null;
    } while (skip(nextKey, row, parentId));
    return getResult(key, parentId);
}

Source File: TLDFieldIndexAggregator.java From datawave with Apache License 2.0

5 votes

public Key apply(SortedKeyValueIterator<Key,Value> itr) throws IOException {
    Key key = itr.getTopKey();
    ByteSequence parentId = parsePointer(key);
    Text row = key.getRow();
    Key nextKey = key;
    do {
        key = nextKey;
        itr.next();
        nextKey = (itr.hasTop() ? itr.getTopKey() : null);
    } while (skip(nextKey, row, parentId));
    return getResult(key, parentId);
}

Source File: BooleanLogicTreeNode.java From accumulo-recipes with Apache License 2.0

5 votes

public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {

        // always start fresh
        this.setTopKey(null);
        this.setDone(false);

        // get my user object which should be an iterator
        SortedKeyValueIterator<?, ?> iter = (SortedKeyValueIterator<?, ?>) this.getUserObject();
        if (iter != null) {

            iter.seek(range, columnFamilies, inclusive);

            if (iter.hasTop()) {
                Key key = (Key) iter.getTopKey();
                key = buildKey(key);

                this.setTopKey(key);
                if (log.isDebugEnabled()) {
                    log.debug("BLTNODE.seek() -> found: " + this.getTopKey());
                }
            } else {
                if (log.isDebugEnabled()) {
                    log.debug("BLTNODE.seek() -> hasTop::false");
                }
                this.setDone(true);
            }
        } else {
            if (log.isDebugEnabled()) {
                log.debug("BLTNODE.seek(), The iterator was null!");
            }
            this.setTopKey(null);
        }
    }

Source File: SeekingAggregator.java From datawave with Apache License 2.0

5 votes

@Override
public Key apply(SortedKeyValueIterator<Key,Value> itr, Range current, Collection<ByteSequence> columnFamilies, boolean includeColumnFamilies)
                throws IOException {
    Key currentKey = itr.getTopKey();
    ByteSequence pointer = parsePointer(currentKey);
    Key result = getResult(currentKey, pointer);
    
    advanceItr(itr, pointer, current, columnFamilies, includeColumnFamilies);
    
    return result;
}

Source File: SecondaryIndexQueryFilterIterator.java From geowave with Apache License 2.0

5 votes

@Override
public boolean acceptRow(final SortedKeyValueIterator<Key, Value> rowIterator)
    throws IOException {
  if (filter != null) {
    while (rowIterator.hasTop()) {
      final Key key = rowIterator.getTopKey();
      final Value value = rowIterator.getTopValue();
      final String cq =
          StringUtils.stringFromBinary(key.getColumnQualifierData().getBackingArray());
      if (!cq.equals(primaryIndexId)) {
        final IndexedPersistenceEncoding<ByteArray> persistenceEncoding =
            new IndexedPersistenceEncoding<>(
                null, // not needed
                null, // not needed
                null, // not needed
                null, // not needed
                0, // not needed
                new MultiFieldPersistentDataset<>(
                    StringUtils.stringFromBinary(key.getColumnQualifierData().getBackingArray()),
                    new ByteArray(value.get())),
                null);
        if (filter.accept(null, persistenceEncoding)) {
          return true;
        }
      }
      rowIterator.next();
    }
    return false;
  }
  // should not happen but if the filter is not sent to this iterator, it
  // will accept everything
  return true;
}

Source File: IdentityAggregator.java From datawave with Apache License 2.0

5 votes

@Override
public Key apply(SortedKeyValueIterator<Key,Value> itr) throws IOException {
    Key key = itr.getTopKey();
    Text row = key.getRow();
    ByteSequence pointer = parsePointer(key.getColumnQualifierData());
    while (itr.hasTop() && samePointer(row, pointer, itr.getTopKey()))
        itr.next();
    
    return TLD.buildParentKey(row, pointer, parseFieldNameValue(key.getColumnFamilyData(), key.getColumnQualifierData()), key.getColumnVisibility(),
                    key.getTimestamp());
}

Source File: SourceManagerTest.java From datawave with Apache License 2.0

5 votes

@Test
public void dataIntegrity_emptyRangeTest() throws IOException {
    SourceManager manager = new SourceManager(dataIterator);
    manager.setInitialSize(1);
    
    // pre-seek both iterators
    SortedKeyValueIterator<Key,Value> copy1 = manager.deepCopy(null);
    copy1.seek(new Range(), Collections.emptyList(), false);
    SortedKeyValueIterator<Key,Value> copy2 = manager.deepCopy(null);
    copy2.seek(new Range(), Collections.emptyList(), false);
    
    // test one side empty range
    copy1.seek(new Range(new Key("20121126_9"), true, new Key("20121126_99"), false), Collections.emptyList(), false);
    copy2.seek(new Range(new Key("20121126_0"), true, new Key("20121126_4"), true), Collections.emptyList(), false);
    
    int mixedCopy1Count = 0;
    int mixedCopy2Count = 0;
    
    while (copy1.hasTop() || copy2.hasTop()) {
        if (copy1.hasTop()) {
            copy1.getTopKey();
            mixedCopy1Count++;
            copy1.next();
        }
        
        if (copy2.hasTop()) {
            copy2.getTopKey();
            mixedCopy2Count++;
            copy2.next();
        }
    }
    
    assertTrue(mixedCopy2Count > mixedCopy1Count);
    assertTrue(mixedCopy2Count == 26);
    assertTrue(mixedCopy1Count == 0);
}

Source File: AggregationIteratorTest.java From timely with Apache License 2.0

5 votes

private Map<Set<Tag>, Aggregation> runQuery(SortedKeyValueIterator<Key, Value> iter, SortedMap<Key, Value> testData,
        long period) throws Exception {
    IteratorSetting is = new IteratorSetting(100, AggregationIterator.class);
    AggregationIterator.setAggregationOptions(is, Collections.singletonMap("host", ".*"), Avg.class.getName());
    SortedKeyValueIterator<Key, Value> source = new SortedMapIterator(testData);
    iter.init(source, is.getOptions(), null);
    iter.seek(new Range(), Collections.emptyList(), true);
    assertTrue(iter.hasTop());
    Key key = iter.getTopKey();
    assertEquals(testData.lastKey(), key);
    Map<Set<Tag>, Aggregation> samples = AggregationIterator.decodeValue(iter.getTopValue());
    return samples;
}

Source File: DownsampleIteratorTest.java From timely with Apache License 2.0

5 votes

private Map<Set<Tag>, Downsample> runQuery(SortedKeyValueIterator<Key, Value> iter, SortedMap<Key, Value> testData,
        long period, long maxDownsampleMemory) throws Exception {
    IteratorSetting is = new IteratorSetting(100, DownsampleIterator.class);
    DownsampleIterator.setDownsampleOptions(is, 0, 1000, period, maxDownsampleMemory, Avg.class.getName());
    SortedKeyValueIterator<Key, Value> source = new SortedMapIterator(testData);
    iter.init(source, is.getOptions(), null);
    iter.seek(new Range(), Collections.emptyList(), true);
    boolean hasTop = iter.hasTop();
    assertTrue(hasTop);
    Key key = null;
    Map<Set<Tag>, Downsample> samples = new HashMap<>();
    do {
        Map<Set<Tag>, Downsample> currentSamples = DownsampleIterator.decodeValue(iter.getTopValue());
        List<Downsample> downsampleArray = new ArrayList<>();
        for (Entry<Set<Tag>, Downsample> entry : currentSamples.entrySet()) {
            Downsample downsample = samples.get(entry.getKey());
            if (downsample == null) {
                samples.put(entry.getKey(), entry.getValue());
            } else {
                downsampleArray.clear();
                downsampleArray.add(downsample);
                downsampleArray.add(entry.getValue());
                samples.put(entry.getKey(), Downsample.combineDownsample(downsampleArray, null));
            }
        }
        key = iter.getTopKey();
        System.out.println(key.toString());
    } while (iter.hasTop());

    assertEquals(testData.lastKey(), key);
    return samples;
}

Source File: SourceManagerTest.java From datawave with Apache License 2.0

4 votes

@Test
public void dataIntegrity_differentRangeReseekTest() throws IOException {
    SourceManager manager = new SourceManager(dataIterator);
    manager.setInitialSize(1);
    
    // pre-seek both iterators
    SortedKeyValueIterator<Key,Value> copy1 = manager.deepCopy(null);
    copy1.seek(new Range(), Collections.emptyList(), false);
    SortedKeyValueIterator<Key,Value> copy2 = manager.deepCopy(null);
    copy2.seek(new Range(), Collections.emptyList(), false);
    
    // different ranges
    copy1.seek(new Range(new Key("20121126_2"), true, new Key("20121126_3"), false), Collections.emptyList(), false);
    copy2.seek(new Range(new Key("20121126_0"), true, new Key("20121126_4"), true), Collections.emptyList(), false);
    
    int mixedCopy1Count = 0;
    int mixedCopy2Count = 0;
    boolean reseek = true;
    
    while (copy1.hasTop() || copy2.hasTop()) {
        if (copy1.hasTop()) {
            copy1.getTopKey();
            mixedCopy1Count++;
            copy1.next();
        }
        
        if (!copy1.hasTop() && reseek) {
            // test intermediate seek
            copy1.seek(new Range(new Key("20121126_2"), true, new Key("20121126_3"), false), Collections.emptyList(), false);
            reseek = false;
        }
        
        if (copy2.hasTop()) {
            copy2.getTopKey();
            mixedCopy2Count++;
            copy2.next();
        }
    }
    
    assertTrue(mixedCopy2Count > mixedCopy1Count);
    assertTrue(mixedCopy2Count == 26);
    // since re-seek after the first one should be 2x expected
    assertTrue(mixedCopy1Count == 9 * 2);
}

Source File: BooleanLogicTreeNode.java From accumulo-recipes with Apache License 2.0

4 votes

public void next() throws IOException {

        // always start fresh
        this.setTopKey(null);

        if (log.isDebugEnabled()) {
            TreeNode[] path = this.getPath();
            log.debug("BLTNODE.next() path-> " + this.buildTreePathString(path));
        }

        // have I been marked as done?
        if (this.isDone()) {
            if (log.isDebugEnabled()) {
                log.debug("I've been marked as done, returning");
            }
            return;
        }

        SortedKeyValueIterator<?, ?> iter = (SortedKeyValueIterator<?, ?>) this.getUserObject();
        iter.next();

        if (iter.hasTop()) {
            Key key = (Key) iter.getTopKey();

            // I have a valid topKey, pull out the piece I want
            key = buildKey(key);
            this.setTopKey(key);

            if (log.isDebugEnabled()) {
                log.debug("BLTNODE.next() -> found: " + this.getTopKey());
            }
        } else {
            // no top value has been returned, I'm done.
            if (log.isDebugEnabled()) {
                log.debug("BLTNODE.next() -> Nothing found");
            }
            this.setTopKey(null);
            this.setDone(true);
        }

    }

Source File: MergingVisibilityCombiner.java From geowave with Apache License 2.0

4 votes

@Override
protected void transformRangeInternal(
    final SortedKeyValueIterator<Key, Value> input,
    final KVBuffer output) throws IOException {
  Mergeable currentMergeable = null;
  Key outputKey = null;
  workKey.set(input.getTopKey());
  // default to not combining, only combine when combiners does not
  // contain this column
  if ((combiners == null) || !combiners.contains(workKey) || workKey.isDeleted()) {
    // don't transform at all
    while (input.hasTop()) {
      output.append(input.getTopKey(), input.getTopValue());
      input.next();
    }
    return;
  }
  while (input.hasTop()) {
    final Value val = input.getTopValue();
    // the SortedKeyValueIterator uses the same instance of topKey to
    // hold keys (a wrapper)
    final Key currentKey = new Key(input.getTopKey());
    if (outputKey == null) {
      outputKey = currentKey;
    } else if ((currentMergeable != null)
        && !outputKey.getRowData().equals(currentKey.getRowData())) {
      output.append(outputKey, new Value(URLClassloaderUtils.toBinary(currentMergeable)));
      currentMergeable = null;
      outputKey = currentKey;
      continue;
    } else {
      final Text combinedVisibility =
          new Text(
              combineVisibilities(
                  currentKey.getColumnVisibility().getBytes(),
                  outputKey.getColumnVisibility().getBytes()));
      outputKey = replaceColumnVisibility(outputKey, combinedVisibility);
    }
    final Mergeable mergeable = getMergeable(currentKey, val.get());
    // hopefully its never the case that null mergeables are stored,
    // but just in case, check
    if (mergeable != null) {
      if (currentMergeable == null) {
        currentMergeable = mergeable;
      } else {
        currentMergeable.merge(mergeable);
      }
    }
    input.next();
  }
  if (currentMergeable != null) {
    output.append(outputKey, new Value(getBinary(currentMergeable)));
  }
}

Source File: CardinalityAggregator.java From datawave with Apache License 2.0

4 votes

@Override
public Key apply(SortedKeyValueIterator<Key,Value> itr, Document doc, AttributeFactory attrs) throws IOException {
    Key key = itr.getTopKey();
    Text row = key.getRow();
    ByteSequence pointer = parsePointer(key.getColumnQualifierData());
    Key nextKey = key;
    while (nextKey != null && samePointer(row, pointer, nextKey)) {
        DatawaveKey topKey = new DatawaveKey(nextKey);
        String field = topKey.getFieldName();
        String value = topKey.getFieldValue();
        
        FieldValueCardinality fvC = null;
        byte[] currentValue = itr.getTopValue().get();
        try {
            if (currentValue.length > 0) {
                fvC = new FieldValueCardinality(HyperLogLogPlus.Builder.build(currentValue));
                if (log.isTraceEnabled()) {
                    log.trace("Set cardinality from FI value");
                }
            }
        } catch (Exception e) {
            if (log.isTraceEnabled()) {
                log.trace("Exception encountered " + e);
            }
        }
        
        if (null == fvC) {
            if (log.isTraceEnabled())
                log.trace("Building cardinality for " + topKey.getUid());
            fvC = new FieldValueCardinality();
            if (setDocIds)
                fvC.setDocId(topKey.getUid());
        }
        
        fvC.setContent(value);
        
        // for cardinalities, only use the visibility metadata
        Key metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, itr.getTopKey().getColumnVisibility(), -1);
        
        Cardinality card = new Cardinality(fvC, metadata, doc.isToKeep());
        
        // only keep fields that are index only
        card.setToKeep(fieldsToKeep == null || fieldsToKeep.contains(JexlASTHelper.removeGroupingContext(field)));
        doc.put(field, card);
        
        key = nextKey;
        itr.next();
        nextKey = (itr.hasTop() ? itr.getTopKey() : null);
    }
    return TLD.buildParentKey(row, pointer, TLD.parseFieldAndValueFromFI(key.getColumnFamilyData(), key.getColumnQualifierData()),
                    key.getColumnVisibility(), key.getTimestamp());
}

Source File: KeyToDocumentData.java From datawave with Apache License 2.0

4 votes

/**
 * Given a Key pointing to the start of an document to aggregate, construct a Range that should encapsulate the "document" to be aggregated together. Also
 * checks to see if data was found for the constructed Range before returning.
 * 
 * @param documentStartKey
 *            A Key of the form "bucket type\x00uid: "
 * @param keyRange
 *            the Range used to initialize source with seek()
 * @return the attributes
 */
private static List<Entry<Key,Value>> collectAttributesForDocumentKey(Key documentStartKey, SortedKeyValueIterator<Key,Value> source, Equality equality,
                EventDataQueryFilter filter, Set<Key> docKeys, Range keyRange) throws IOException {
    
    // setup the document key we are filtering for on the EventDataQueryFilter
    if (filter != null) {
        filter.startNewDocument(documentStartKey);
    }
    
    final List<Entry<Key,Value>> documentAttributes;
    if (null == documentStartKey) {
        documentAttributes = Collections.emptyList();
    } else {
        documentAttributes = new ArrayList<>(256);
        WeakReference<Key> docAttrKey = new WeakReference<>(source.getTopKey());
        
        while (docAttrKey != null) {
            boolean seeked = false;
            if (equality.partOf(documentStartKey, docAttrKey.get())) {
                if (filter == null || filter.keep(docAttrKey.get())) {
                    docKeys.add(getDocKey(docAttrKey.get()));
                }
                
                if (filter == null || filter.apply(Maps.immutableEntry(docAttrKey.get(), StringUtils.EMPTY))) {
                    documentAttributes.add(Maps.immutableEntry(docAttrKey.get(), source.getTopValue()));
                } else if (filter != null) {
                    Key limitKey = filter.transform(docAttrKey.get());
                    if (limitKey != null) {
                        documentAttributes.add(Maps.immutableEntry(limitKey, EMPTY_VALUE));
                    }
                    // request a seek range from the filter
                    Range seekRange = filter.getSeekRange(docAttrKey.get(), keyRange.getEndKey(), keyRange.isEndKeyInclusive());
                    if (seekRange != null) {
                        source.seek(seekRange, columnFamilies, inclusive);
                        seeked = true;
                    }
                }
            }
            
            // only call next if this wasn't a fresh seek()
            if (!seeked) {
                source.next();
            }
            
            if (source.hasTop()) {
                docAttrKey = new WeakReference<>(source.getTopKey());
            } else {
                docAttrKey = null;
            }
            
        }
    }
    
    return documentAttributes;
}

Source File: ValueCombiner.java From datawave with Apache License 2.0

2 votes

/**
 * Constructs an iterator over Values whose Keys are versions of the current topKey of the source SortedKeyValueIterator.
 * 
 * @param source
 *            The {@code SortedKeyValueIterator<Key,Value>} from which to read data.
 */
public ValueCombiner(SortedKeyValueIterator<Key,Value> source) {
    this.source = source;
    topKey = new Key(source.getTopKey());
    hasNext = _hasNext();
}

Source File: StatsLinksEdgeCombiner.java From datawave with Apache License 2.0

2 votes

/**
 * Constructs an iterator over Values whose Keys are versions of the current topKey of the source SortedKeyValueIterator.
 * 
 * @param source
 *            The {@code SortedKeyValueIterator<Key,Value>} from which to read data.
 */
public ValueIterator(final SortedKeyValueIterator<Key,Value> source) {
    this.source = source;
    topKey = new Key(source.getTopKey());
    hasNext = _hasNext();
}

Java Code Examples for org.apache.accumulo.core.iterators.SortedKeyValueIterator#getTopKey()