org.apache.lucene.search.suggest.InputIterator Java Exaples

Source File: JaspellLookup.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  if (iterator.hasContexts()) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }
  count = 0;
  trie = new JaspellTernarySearchTrie();
  trie.setMatchAlmostDiff(editDistance);
  BytesRef spare;
  final CharsRefBuilder charsSpare = new CharsRefBuilder();

  while ((spare = iterator.next()) != null) {
    final long weight = iterator.weight();
    if (spare.length == 0) {
      continue;
    }
    charsSpare.copyUTF8Bytes(spare);
    trie.put(charsSpare.toString(), weight);
    count++;
  }
}

Source File: TSTLookup.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  if (iterator.hasContexts()) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }
  root = new TernaryTreeNode();

  // make sure it's sorted and the comparator uses UTF16 sort order
  iterator = new SortedInputIterator(tempDir, tempFileNamePrefix, iterator, utf8SortedAsUTF16SortOrder);
  count = 0;
  ArrayList<String> tokens = new ArrayList<>();
  ArrayList<Number> vals = new ArrayList<>();
  BytesRef spare;
  CharsRefBuilder charsSpare = new CharsRefBuilder();
  while ((spare = iterator.next()) != null) {
    charsSpare.copyUTF8Bytes(spare);
    tokens.add(charsSpare.toString());
    vals.add(Long.valueOf(iterator.weight()));
    count++;
  }
  autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}

Source File: LuceneDictionary.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final InputIterator getEntryIterator() throws IOException {
  final Terms terms = MultiTerms.getTerms(reader, field);
  if (terms != null) {
    return new InputIterator.InputIteratorWrapper(terms.iterator());
  } else {
    return InputIterator.EMPTY;
  }
}

Source File: FreeTextSuggester.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void build(InputIterator iterator) throws IOException {
  build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
}

Source File: FSTCompletionLookup.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  if (iterator.hasContexts()) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }

  OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix);
  ExternalRefSorter externalSorter = new ExternalRefSorter(sorter);
  IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
  String tempSortedFileName = null;

  OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
  OfflineSorter.ByteSequencesReader reader = null;

  // Push floats up front before sequences to sort them. For now, assume they are non-negative.
  // If negative floats are allowed some trickery needs to be done to find their byte order.
  count = 0;
  try {
    byte [] buffer = new byte [0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
    BytesRef spare;
    int inputLineCount = 0;
    while ((spare = iterator.next()) != null) {
      if (spare.length + 4 >= buffer.length) {
        buffer = ArrayUtil.grow(buffer, spare.length + 4);
      }

      output.reset(buffer);
      output.writeInt(encodeWeight(iterator.weight()));
      output.writeBytes(spare.bytes, spare.offset, spare.length);
      writer.write(buffer, 0, output.getPosition());
      inputLineCount++;
    }
    CodecUtil.writeFooter(tempInput);
    writer.close();

    // We don't know the distribution of scores and we need to bucket them, so we'll sort
    // and divide into equal buckets.
    tempSortedFileName = sorter.sort(tempInput.getName());
    tempDir.deleteFile(tempInput.getName());

    FSTCompletionBuilder builder = new FSTCompletionBuilder(
        buckets, externalSorter, sharedTailLength);

    reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), tempSortedFileName);
    long line = 0;
    int previousBucket = 0;
    int previousScore = 0;
    ByteArrayDataInput input = new ByteArrayDataInput();
    BytesRef tmp2 = new BytesRef();
    while (true) {
      BytesRef scratch = reader.next();
      if (scratch == null) {
        break;
      }
      input.reset(scratch.bytes, scratch.offset, scratch.length);
      int currentScore = input.readInt();

      int bucket;
      if (line > 0 && currentScore == previousScore) {
        bucket = previousBucket;
      } else {
        bucket = (int) (line * buckets / inputLineCount);
      }
      previousScore = currentScore;
      previousBucket = bucket;

      // Only append the input, discard the weight.
      tmp2.bytes = scratch.bytes;
      tmp2.offset = scratch.offset + input.getPosition();
      tmp2.length = scratch.length - input.getPosition();
      builder.add(tmp2, bucket);

      line++;
      count++;
    }

    // The two FSTCompletions share the same automaton.
    this.higherWeightsCompletion = builder.build();
    this.normalCompletion = new FSTCompletion(
        higherWeightsCompletion.getFST(), false, exactMatchFirst);
    
  } finally {
    IOUtils.closeWhileHandlingException(reader, writer, externalSorter);
    IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
  }
}

Source File: WFSTCompletionLookup.java From lucene-solr with Apache License 2.0

4 votes

WFSTInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source) throws IOException {
  super(tempDir, tempFileNamePrefix, source);
  assert source.hasPayloads() == false;
}

Source File: PlainTextDictionary.java From lucene-solr with Apache License 2.0

4 votes

@Override
public InputIterator getEntryIterator() throws IOException {
  return new InputIterator.InputIteratorWrapper(new FileIterator());
}

Source File: HighFrequencyDictionary.java From lucene-solr with Apache License 2.0

4 votes

@Override
public final InputIterator getEntryIterator() throws IOException {
  return new HighFrequencyIterator();
}

Source File: TestFreeTextSuggester.java From lucene-solr with Apache License 2.0

4 votes

@Ignore
public void testWiki() throws Exception {
  final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt");
  // Skip header:
  lfd.nextDoc();
  Analyzer analyzer = new MockAnalyzer(random());
  FreeTextSuggester sug = new FreeTextSuggester(analyzer);
  sug.build(new InputIterator() {

      private int count;

      @Override
      public long weight() {
        return 1;
      }

      @Override
      public BytesRef next() {
        Document doc;
        try {
          doc = lfd.nextDoc();
        } catch (IOException ioe) {
          throw new RuntimeException(ioe);
        }
        if (doc == null) {
          return null;
        }
        if (count++ == 10000) {
          return null;
        }
        return new BytesRef(doc.get("body"));
      }

      @Override
      public BytesRef payload() {
        return null;
      }

      @Override
      public boolean hasPayloads() {
        return false;
      }

      @Override
      public Set<BytesRef> contexts() {
        return null;
      }

      @Override
      public boolean hasContexts() {
        return false;
      }
    });
  if (VERBOSE) {
    System.out.println(sug.ramBytesUsed() + " bytes");

    List<LookupResult> results = sug.lookup("general r", 10);
    System.out.println("results:");
    for(LookupResult result : results) {
      System.out.println("  " + result);
    }
  }
  analyzer.close();
  lfd.close();
}

Source File: RandomTestDictionaryFactory.java From lucene-solr with Apache License 2.0

4 votes

@Override
public InputIterator getEntryIterator() throws IOException {
  return new InputIterator.InputIteratorWrapper(new RandomByteRefIterator());
}

Source File: Dictionary.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Returns an iterator over all the entries
 * @return Iterator
 */
InputIterator getEntryIterator() throws IOException;

org.apache.lucene.search.suggest.InputIterator Java Examples