com.carrotsearch.hppc.cursors.LongCursor Java Exaples

Source File: ExpandComponent.java From lucene-solr with Apache License 2.0

6 votes

private Query getGroupQuery(String fname,
                         FieldType ft,
                         int size,
                         LongHashSet groupSet) {

  BytesRef[] bytesRefs = new BytesRef[size];
  int index = -1;
  BytesRefBuilder term = new BytesRefBuilder();
  Iterator<LongCursor> it = groupSet.iterator();

  while (it.hasNext()) {
    LongCursor cursor = it.next();
    String stringVal = numericToString(ft, cursor.value);
    ft.readableToIndexed(stringVal, term);
    bytesRefs[++index] = term.toBytesRef();
  }

  return new TermInSetQuery(fname, bytesRefs);
}

Source File: LongTermsSet.java From siren-join with GNU Affero General Public License v3.0

6 votes

/**
 * Serialize the list of terms to the {@link StreamOutput}.
 * <br>
 * Given the low performance of {@link org.elasticsearch.common.io.stream.BytesStreamOutput} when writing a large number
 * of longs (5 to 10 times slower than writing directly to a byte[]), we use a small buffer of 8kb
 * to optimise the throughput. 8kb seems to be the optimal buffer size, larger buffer size did not improve
 * the throughput.
 *
 * @param out the output
 */
@Override
public void writeTo(StreamOutput out) throws IOException {
  // Encode flag
  out.writeBoolean(this.isPruned());

  // Encode size of list
  out.writeInt(set.size());

  // Encode longs
  BytesRef buffer = new BytesRef(new byte[1024 * 8]);
  Iterator<LongCursor> it = set.iterator();
  while (it.hasNext()) {
    Bytes.writeLong(buffer, it.next().value);
    if (buffer.offset == buffer.length) {
      out.write(buffer.bytes, 0, buffer.offset);
      buffer.offset = 0;
    }
  }

  // flush the remaining bytes from the buffer
  out.write(buffer.bytes, 0, buffer.offset);
}

Source File: ExpandComponent.java From lucene-solr with Apache License 2.0

5 votes

public NumericGroupExpandCollector(String field, long nullValue, LongHashSet groupSet, IntHashSet collapsedSet, int limit, Sort sort) throws IOException {
  int numGroups = collapsedSet.size();
  this.nullValue = nullValue;
  groups = new LongObjectHashMap<>(numGroups);
  for (LongCursor cursor : groupSet) {
    groups.put(cursor.value, getCollector(limit, sort));
  }

  this.field = field;
  this.collapsedSet = collapsedSet;
}

Source File: ExpandComponent.java From lucene-solr with Apache License 2.0

5 votes

private Query getPointGroupQuery(SchemaField sf,
                                 int size,
                                 LongHashSet groupSet) {

  Iterator<LongCursor> it = groupSet.iterator();
  List<String> values = new ArrayList<>(size);
  FieldType ft = sf.getType();
  while (it.hasNext()) {
    LongCursor cursor = it.next();
    values.add(numericToString(ft, cursor.value));
  }

  return sf.getType().getSetQuery(null, sf, values);
}

Source File: IntLongDynamicMap.java From lucene-solr with Apache License 2.0

5 votes

public void forEachValue(LongConsumer consumer) {
  if (keyValues != null) {
    for (long val : keyValues) {
      if (val != emptyValue) consumer.accept(val);
    }
  } else {
    for (LongCursor ord : hashMap.values()) {
      consumer.accept(ord.value);
    }
  }
}

Source File: LongTermsSet.java From siren-join with GNU Affero General Public License v3.0

5 votes

@Override
public BytesRef writeToBytes() {
  long start = System.nanoTime();
  int size = set.size();

  BytesRef bytes = new BytesRef(new byte[HEADER_SIZE + 8 * size]);

  // Encode encoding type
  Bytes.writeInt(bytes, this.getEncoding().ordinal());

  // Encode flag
  bytes.bytes[bytes.offset++] = (byte) (this.isPruned() ? 1 : 0);

  // Encode size of the set
  Bytes.writeInt(bytes, size);

  // Encode longs
  for (LongCursor i : set) {
    Bytes.writeLong(bytes, i.value);
  }

  logger.debug("Serialized {} terms - took {} ms", this.size(), (System.nanoTime() - start) / 1000000);

  bytes.length = bytes.offset;
  bytes.offset = 0;
  return bytes;
}

Source File: TokenTreeBuilder.java From sasi with Apache License 2.0

5 votes

public void add(SortedMap<Long, LongSet> data)
{
    for (Map.Entry<Long, LongSet> newEntry : data.entrySet())
    {
        LongSet found = tokens.get(newEntry.getKey());
        if (found == null)
            tokens.put(newEntry.getKey(), (found = new LongOpenHashSet(4)));

        for (LongCursor offset : newEntry.getValue())
            found.add(offset.value);
    }
}

Source File: OnDiskIndexTest.java From sasi with Apache License 2.0

5 votes

private static Set<DecoratedKey> convert(TokenTreeBuilder offsets)
{
    Set<DecoratedKey> result = new HashSet<>();

    Iterator<Pair<Long, LongSet>> offsetIter = offsets.iterator();
    while (offsetIter.hasNext())
    {
        LongSet v = offsetIter.next().right;

        for (LongCursor offset : v)
            result.add(keyAt(offset.value));
    }
    return result;
}

Source File: TokenTreeTest.java From sasi with Apache License 2.0

5 votes

@Override
public Iterator<DecoratedKey> iterator()
{
    List<DecoratedKey> keys = new ArrayList<>(offsets.size());
    for (LongCursor offset : offsets)
         keys.add(dk(offset.value));

    return keys.iterator();
}

Source File: TokenTreeTest.java From sasi with Apache License 2.0

5 votes

private static Set<DecoratedKey> convert(LongSet offsets)
{
    Set<DecoratedKey> keys = new HashSet<>();
    for (LongCursor offset : offsets)
        keys.add(KEY_CONVERTER.apply(offset.value));

    return keys;
}

Source File: SITransactor.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private void resolveChildConflicts(Partition table,DataPut put,LongHashSet conflictingChildren) throws IOException{
    if(conflictingChildren!=null && !conflictingChildren.isEmpty()){
        DataDelete delete=opFactory.newDelete(put.key());
        Iterable<DataCell> cells=put.cells();
        for(LongCursor lc : conflictingChildren){
            for(DataCell dc : cells){
                delete.deleteColumn(dc.family(),dc.qualifier(),lc.value);
            }
            delete.deleteColumn(SIConstants.DEFAULT_FAMILY_BYTES,SIConstants.TOMBSTONE_COLUMN_BYTES,lc.value);
            delete.deleteColumn(SIConstants.DEFAULT_FAMILY_BYTES,SIConstants.COMMIT_TIMESTAMP_COLUMN_BYTES,lc.value);
        }
        delete.addAttribute(SIConstants.SUPPRESS_INDEXING_ATTRIBUTE_NAME,SIConstants.SUPPRESS_INDEXING_ATTRIBUTE_VALUE);
        table.delete(delete);
    }
}

Source File: TermsByQueryActionTest.java From siren-join with GNU Affero General Public License v3.0

4 votes

/**
 * Tests the ordering by document score.
 */
@Test
public void testTermsByQueryWithLimitOrderByDocScore() throws Exception {
  // Enforce one single shard for index as it is difficult with multiple shards
  // to avoid having one shard with less than 5 even ids (i.e., to avoid the shard
  // returning odd ids.
  Map<String, Object> indexSettings = new HashMap<>();
  indexSettings.put("number_of_shards", 1);
  assertAcked(prepareCreate("test").setSettings(indexSettings));

  int numDocs = RandomizedTest.randomIntBetween(100, 2000);
  logger.info("--> indexing [" + numDocs + "] docs");
  for (int i = 0; i < numDocs / 2; i += 2) {
    client().prepareIndex("test", "type", "" + i)
            .setSource(jsonBuilder().startObject()
            .field("int", i)
            .field("text", "aaa")
            .endObject())
            .execute().actionGet();
  }

  for (int i = 1; i < numDocs / 2; i += 2) {
    client().prepareIndex("test", "type", "" + i)
            .setSource(jsonBuilder().startObject()
            .field("int", i)
            .field("text", "aaa aaa")
            .endObject())
            .execute().actionGet();
  }

  client().admin().indices().prepareRefresh("test").execute().actionGet();

  logger.info("--> lookup terms in field [int]");
  TermsByQueryResponse resp = new TermsByQueryRequestBuilder(client(), TermsByQueryAction.INSTANCE).setIndices("test")
                                                                      .setField("int")
                                                                      .setQuery(QueryBuilders.termQuery("text", "aaa"))
                                                                      .setOrderBy(TermsByQueryRequest.Ordering.DOC_SCORE)
                                                                      .setMaxTermsPerShard(5)
                                                                      .setTermsEncoding(TermsByQueryRequest.TermsEncoding.LONG)
                                                                      .execute()
                                                                      .actionGet();

  int expectedMaxResultSize = this.getNumShards("test").totalNumShards * 5;
  ElasticsearchAssertions.assertNoFailures(resp);
  assertThat(resp.getEncodedTermsSet(), notNullValue());
  assertThat(resp.getSize(), lessThanOrEqualTo(expectedMaxResultSize));
  TermsSet lTerms = NumericTermsSet.readFrom(resp.getEncodedTermsSet());
  assertThat(lTerms instanceof LongTermsSet, is(true));

  // If the ordering by document score worked, we should only have documents with text = aaa (even ids), and no
  // documents with text = aaa aaa (odd ids), as the first one will be ranked higher.

  Iterator<LongCursor> it = ((LongTermsSet) lTerms).getLongHashSet().iterator();
  while (it.hasNext()) {
    long value = it.next().value;
    assertThat(value % 2 == 0, is(true));
  }
}

Source File: TokenTreeBuilder.java From sasi with Apache License 2.0

4 votes

private void serializeOverflowCollisions(ByteBuffer buf)
{
    if (overflowCollisions != null)
        for (LongCursor offset : overflowCollisions)
            buf.putLong(offset.value);
}

Source File: TokenTreeTest.java From sasi with Apache License 2.0

4 votes

@Test
public void buildWithMultipleMapsAndIterate() throws Exception
{
    final SortedMap<Long, LongSet> merged = new TreeMap<>();
    final TokenTreeBuilder builder = new TokenTreeBuilder(simpleTokenMap).finish();
    builder.add(collidingTokensMap);

    merged.putAll(collidingTokensMap);
    for (Map.Entry<Long, LongSet> entry : simpleTokenMap.entrySet())
    {
        if (merged.containsKey(entry.getKey()))
        {
            LongSet mergingOffsets  = entry.getValue();
            LongSet existingOffsets = merged.get(entry.getKey());

            if (mergingOffsets.equals(existingOffsets))
                continue;

            Set<Long> mergeSet = new HashSet<>();
            for (LongCursor merging : mergingOffsets)
                mergeSet.add(merging.value);

            for (LongCursor existing : existingOffsets)
                mergeSet.add(existing.value);

            LongSet mergedResults = new LongOpenHashSet();
            for (Long result : mergeSet)
                mergedResults.add(result);

            merged.put(entry.getKey(), mergedResults);
        }
        else
        {
            merged.put(entry.getKey(), entry.getValue());
        }
    }

    final Iterator<Pair<Long, LongSet>> tokenIterator = builder.iterator();
    final Iterator<Map.Entry<Long, LongSet>> listIterator = merged.entrySet().iterator();
    while (tokenIterator.hasNext() && listIterator.hasNext())
    {
        Pair<Long, LongSet> tokenNext = tokenIterator.next();
        Map.Entry<Long, LongSet> listNext = listIterator.next();

        Assert.assertEquals(listNext.getKey(), tokenNext.left);
        Assert.assertEquals(listNext.getValue(), tokenNext.right);
    }

    Assert.assertFalse("token iterator not finished", tokenIterator.hasNext());
    Assert.assertFalse("list iterator not finished", listIterator.hasNext());

}

com.carrotsearch.hppc.cursors.LongCursor Java Examples