org.apache.lucene.util.BytesRefIterator#next

Source File: FSTCompletionBuilder.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
      .shareMaxTailLength(shareMaxTailLength).build();

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }
  
  return count == 0 ? null : fstCompiler.compile();
}

Source File: BytesRefSortersTest.java From lucene-solr with Apache License 2.0

6 votes

private void check(BytesRefSorter sorter) throws Exception {
  for (int i = 0; i < 100; i++) {
    byte [] current = new byte [random().nextInt(256)];
    random().nextBytes(current);
    sorter.add(new BytesRef(current));
  }

  // Create two iterators and check that they're aligned with each other.
  BytesRefIterator i1 = sorter.iterator();
  BytesRefIterator i2 = sorter.iterator();
  
  // Verify sorter contract.
  expectThrows(IllegalStateException.class, () -> {
    sorter.add(new BytesRef(new byte [1]));
  });

  while (true) {
    BytesRef spare1 = i1.next();
    BytesRef spare2 = i2.next();
    assertEquals(spare1, spare2);
    if (spare1 == null) {
      break;
    }
  }
}

Source File: DisjunctionMatchesIterator.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 *
 * Only terms that have at least one match in the given document will be included
 */
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Objects.requireNonNull(field);
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term = terms.next(); term != null; term = terms.next()) {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      }
      else {
        reuse = pe;
      }
    }
  }
  return null;
}

Source File: Netty4Utils.java From crate with Apache License 2.0

6 votes

/**
 * Turns the given BytesReference into a ByteBuf. Note: the returned ByteBuf will reference the internal
 * pages of the BytesReference. Don't free the bytes of reference before the ByteBuf goes out of scope.
 */
public static ByteBuf toByteBuf(final BytesReference reference) {
    if (reference.length() == 0) {
        return Unpooled.EMPTY_BUFFER;
    }
    if (reference instanceof ByteBufBytesReference) {
        return ((ByteBufBytesReference) reference).toByteBuf();
    } else {
        final BytesRefIterator iterator = reference.iterator();
        // usually we have one, two, or three components from the header, the message, and a buffer
        final List<ByteBuf> buffers = new ArrayList<>(3);
        try {
            BytesRef slice;
            while ((slice = iterator.next()) != null) {
                buffers.add(Unpooled.wrappedBuffer(slice.bytes, slice.offset, slice.length));
            }
            final CompositeByteBuf composite = Unpooled.compositeBuffer(buffers.size());
            composite.addComponents(true, buffers);
            return composite;
        } catch (IOException ex) {
            throw new AssertionError("no IO happens here", ex);
        }
    }
}

Source File: BytesReference.java From crate with Apache License 2.0

6 votes

@Override
public int hashCode() {
    if (hash == null) {
        final BytesRefIterator iterator = iterator();
        BytesRef ref;
        int result = 1;
        try {
            while ((ref = iterator.next()) != null) {
                for (int i = 0; i < ref.length; i++) {
                    result = 31 * result + ref.bytes[ref.offset + i];
                }
            }
        } catch (IOException ex) {
            throw new AssertionError("wont happen", ex);
        }
        return hash = result;
    } else {
        return hash.intValue();
    }
}

Source File: BytesReferenceStreamInput.java From crate with Apache License 2.0

5 votes

BytesReferenceStreamInput(BytesRefIterator iterator, final int length) throws IOException {
    this.iterator = iterator;
    this.slice = iterator.next();
    this.length = length;
    this.offset = 0;
    this.sliceIndex = 0;
}

Source File: BytesReference.java From crate with Apache License 2.0

5 votes

/**
 * Writes the bytes directly to the output stream.
 */
public void writeTo(OutputStream os) throws IOException {
    final BytesRefIterator iterator = iterator();
    BytesRef ref;
    while ((ref = iterator.next()) != null) {
        os.write(ref.bytes, ref.offset, ref.length);
    }
}

Source File: CompositeBytesReference.java From crate with Apache License 2.0

5 votes

@Override
public BytesRef toBytesRef() {
    BytesRefBuilder builder = new BytesRefBuilder();
    builder.grow(length());
    BytesRef spare;
    BytesRefIterator iterator = iterator();
    try {
        while ((spare = iterator.next()) != null) {
            builder.append(spare);
        }
    } catch (IOException ex) {
        throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences
    }
    return builder.toBytesRef();
}

Source File: SpellChecker.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Indexes the data from the given {@link Dictionary}.
 * @param dict Dictionary to index
 * @param config {@link IndexWriterConfig} to use
 * @param fullMerge whether or not the spellcheck index should be fully merged
 * @throws AlreadyClosedException if the Spellchecker is already closed
 * @throws IOException If there is a low-level I/O error.
 */
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    final Directory dir = this.spellIndex;
    final IndexWriter writer = new IndexWriter(dir, config);
    IndexSearcher indexSearcher = obtainSearcher();
    final List<TermsEnum> termsEnums = new ArrayList<>();

    final IndexReader reader = searcher.getIndexReader();
    if (reader.maxDoc() > 0) {
      for (final LeafReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(F_WORD);
        if (terms != null)
          termsEnums.add(terms.iterator());
      }
    }
    
    boolean isEmpty = termsEnums.isEmpty();

    try { 
      BytesRefIterator iter = dict.getEntryIterator();
      BytesRef currentTerm;
      
      terms: while ((currentTerm = iter.next()) != null) {

        String word = currentTerm.utf8ToString();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }

        if (!isEmpty) {
          for (TermsEnum te : termsEnums) {
            if (te.seekExact(currentTerm)) {
              continue terms;
            }
          }
        }

        // ok index the word
        Document doc = createDocument(word, getMin(len), getMax(len));
        writer.addDocument(doc);
      }
    } finally {
      releaseSearcher(indexSearcher);
    }
    if (fullMerge) {
      writer.forceMerge(1);
    }
    // close writer
    writer.close();
    // TODO: this isn't that great, maybe in the future SpellChecker should take
    // IWC in its ctor / keep its writer open?
    
    // also re-open the spell index to see our own changes when the next suggestion
    // is fetched:
    swapSearcher(dir);
  }
}

Java Code Examples for org.apache.lucene.util.BytesRefIterator#next()