org.apache.lucene.search.suggest.InputIterator Java Examples
The following examples show how to use
org.apache.lucene.search.suggest.InputIterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JaspellLookup.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } if (iterator.hasContexts()) { throw new IllegalArgumentException("this suggester doesn't support contexts"); } count = 0; trie = new JaspellTernarySearchTrie(); trie.setMatchAlmostDiff(editDistance); BytesRef spare; final CharsRefBuilder charsSpare = new CharsRefBuilder(); while ((spare = iterator.next()) != null) { final long weight = iterator.weight(); if (spare.length == 0) { continue; } charsSpare.copyUTF8Bytes(spare); trie.put(charsSpare.toString(), weight); count++; } }
Example #2
Source File: TSTLookup.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } if (iterator.hasContexts()) { throw new IllegalArgumentException("this suggester doesn't support contexts"); } root = new TernaryTreeNode(); // make sure it's sorted and the comparator uses UTF16 sort order iterator = new SortedInputIterator(tempDir, tempFileNamePrefix, iterator, utf8SortedAsUTF16SortOrder); count = 0; ArrayList<String> tokens = new ArrayList<>(); ArrayList<Number> vals = new ArrayList<>(); BytesRef spare; CharsRefBuilder charsSpare = new CharsRefBuilder(); while ((spare = iterator.next()) != null) { charsSpare.copyUTF8Bytes(spare); tokens.add(charsSpare.toString()); vals.add(Long.valueOf(iterator.weight())); count++; } autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); }
Example #3
Source File: LuceneDictionary.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final InputIterator getEntryIterator() throws IOException { final Terms terms = MultiTerms.getTerms(reader, field); if (terms != null) { return new InputIterator.InputIteratorWrapper(terms.iterator()); } else { return InputIterator.EMPTY; } }
Example #4
Source File: FreeTextSuggester.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void build(InputIterator iterator) throws IOException { build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); }
Example #5
Source File: FSTCompletionLookup.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } if (iterator.hasContexts()) { throw new IllegalArgumentException("this suggester doesn't support contexts"); } OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix); ExternalRefSorter externalSorter = new ExternalRefSorter(sorter); IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT); String tempSortedFileName = null; OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput); OfflineSorter.ByteSequencesReader reader = null; // Push floats up front before sequences to sort them. For now, assume they are non-negative. // If negative floats are allowed some trickery needs to be done to find their byte order. count = 0; try { byte [] buffer = new byte [0]; ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); BytesRef spare; int inputLineCount = 0; while ((spare = iterator.next()) != null) { if (spare.length + 4 >= buffer.length) { buffer = ArrayUtil.grow(buffer, spare.length + 4); } output.reset(buffer); output.writeInt(encodeWeight(iterator.weight())); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); inputLineCount++; } CodecUtil.writeFooter(tempInput); writer.close(); // We don't know the distribution of scores and we need to bucket them, so we'll sort // and divide into equal buckets. tempSortedFileName = sorter.sort(tempInput.getName()); tempDir.deleteFile(tempInput.getName()); FSTCompletionBuilder builder = new FSTCompletionBuilder( buckets, externalSorter, sharedTailLength); reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), tempSortedFileName); long line = 0; int previousBucket = 0; int previousScore = 0; ByteArrayDataInput input = new ByteArrayDataInput(); BytesRef tmp2 = new BytesRef(); while (true) { BytesRef scratch = reader.next(); if (scratch == null) { break; } input.reset(scratch.bytes, scratch.offset, scratch.length); int currentScore = input.readInt(); int bucket; if (line > 0 && currentScore == previousScore) { bucket = previousBucket; } else { bucket = (int) (line * buckets / inputLineCount); } previousScore = currentScore; previousBucket = bucket; // Only append the input, discard the weight. tmp2.bytes = scratch.bytes; tmp2.offset = scratch.offset + input.getPosition(); tmp2.length = scratch.length - input.getPosition(); builder.add(tmp2, bucket); line++; count++; } // The two FSTCompletions share the same automaton. this.higherWeightsCompletion = builder.build(); this.normalCompletion = new FSTCompletion( higherWeightsCompletion.getFST(), false, exactMatchFirst); } finally { IOUtils.closeWhileHandlingException(reader, writer, externalSorter); IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName); } }
Example #6
Source File: WFSTCompletionLookup.java From lucene-solr with Apache License 2.0 | 4 votes |
WFSTInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source) throws IOException { super(tempDir, tempFileNamePrefix, source); assert source.hasPayloads() == false; }
Example #7
Source File: PlainTextDictionary.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public InputIterator getEntryIterator() throws IOException { return new InputIterator.InputIteratorWrapper(new FileIterator()); }
Example #8
Source File: HighFrequencyDictionary.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public final InputIterator getEntryIterator() throws IOException { return new HighFrequencyIterator(); }
Example #9
Source File: TestFreeTextSuggester.java From lucene-solr with Apache License 2.0 | 4 votes |
@Ignore public void testWiki() throws Exception { final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt"); // Skip header: lfd.nextDoc(); Analyzer analyzer = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(analyzer); sug.build(new InputIterator() { private int count; @Override public long weight() { return 1; } @Override public BytesRef next() { Document doc; try { doc = lfd.nextDoc(); } catch (IOException ioe) { throw new RuntimeException(ioe); } if (doc == null) { return null; } if (count++ == 10000) { return null; } return new BytesRef(doc.get("body")); } @Override public BytesRef payload() { return null; } @Override public boolean hasPayloads() { return false; } @Override public Set<BytesRef> contexts() { return null; } @Override public boolean hasContexts() { return false; } }); if (VERBOSE) { System.out.println(sug.ramBytesUsed() + " bytes"); List<LookupResult> results = sug.lookup("general r", 10); System.out.println("results:"); for(LookupResult result : results) { System.out.println(" " + result); } } analyzer.close(); lfd.close(); }
Example #10
Source File: RandomTestDictionaryFactory.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public InputIterator getEntryIterator() throws IOException { return new InputIterator.InputIteratorWrapper(new RandomByteRefIterator()); }
Example #11
Source File: Dictionary.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Returns an iterator over all the entries * @return Iterator */ InputIterator getEntryIterator() throws IOException;