org.apache.lucene.search.spell.Dictionary Java Examples
The following examples show how to use
org.apache.lucene.search.spell.Dictionary.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testEmptyReader() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); assertNull(inputIterator.payload()); IOUtils.close(ir, analyzer, dir); }
Example #2
Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testValueSourceEmptyReader() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); assertNull(inputIterator.payload()); IOUtils.close(ir, analyzer, dir); }
Example #3
Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testLongValuesSourceEmptyReader() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); assertNull(inputIterator.payload()); IOUtils.close(ir, analyzer, dir); }
Example #4
Source File: FileDictionaryFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if (params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String sourceLocation = (String)params.get(Suggester.LOCATION); if (sourceLocation == null) { throw new IllegalArgumentException(Suggester.LOCATION + " parameter is mandatory for using FileDictionary"); } String fieldDelimiter = (params.get(FIELD_DELIMITER) != null) ? (String) params.get(FIELD_DELIMITER) : FileDictionary.DEFAULT_FIELD_DELIMITER; try { return new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8), fieldDelimiter); } catch (IOException e) { throw new RuntimeException(e); } }
Example #5
Source File: DocumentDictionaryFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String) params.get(FIELD); String weightField = (String) params.get(WEIGHT_FIELD); String payloadField = (String) params.get(PAYLOAD_FIELD); String contextField = (String) params.get(CONTEXT_FIELD); if (field == null) { throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); } return new DocumentDictionary(searcher.getIndexReader(), field, weightField, payloadField, contextField); }
Example #6
Source File: HighFrequencyDictionaryFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String)params.get(SolrSpellChecker.FIELD); if (field == null) { throw new IllegalArgumentException(SolrSpellChecker.FIELD + " is a mandatory parameter"); } float threshold = params.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f : (Float)params.get(THRESHOLD_TOKEN_FREQUENCY); return new HighFrequencyDictionary(searcher.getIndexReader(), field, threshold); }
Example #7
Source File: AutoCompleter.java From webdsl with Apache License 2.0 | 5 votes |
/** * Indexes the data from the given reader. * @param reader Source index reader, from which autocomplete words are obtained for the defined field * @param field the field of the source index reader to index for autocompletion * @param mergeFactor mergeFactor to use when indexing * @param ramMB the max amount or memory in MB to use * @param optimize whether or not the autocomplete index should be optimized * @throws AlreadyClosedException if the Autocompleter is already closed * @throws IOException */ public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException { synchronized (modifyCurrentIndexLock) { ensureOpen(); final Directory dir = this.autoCompleteIndex; final Dictionary dict = new LuceneDictionary(reader, field); final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB)); IndexSearcher indexSearcher = obtainSearcher(); final List<IndexReader> readers = new ArrayList<IndexReader>(); if (searcher.maxDoc() > 0) { ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader()); } //clear the index writer.deleteAll(); try { Iterator<String> iter = dict.getWordsIterator(); while (iter.hasNext()) { String word = iter.next(); // ok index the word Document doc = createDocument(word, reader.docFreq(new Term(field, word))); writer.addDocument(doc); } } finally { releaseSearcher(indexSearcher); } // close writer if (optimize) writer.optimize(); writer.close(); // also re-open the autocomplete index to see our own changes when the next suggestion // is fetched: swapSearcher(dir); } }
Example #8
Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testWithOptionalPayload() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); // Create a document that is missing the payload field Document doc = new Document(); Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES); doc.add(field); // do not store the payload or the contexts Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100); doc.add(weight); writer.addDocument(doc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); // Even though the payload field is missing, the dictionary iterator should not skip the document // because the payload field is optional. Dictionary dictionaryOptionalPayload = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator(); BytesRef f = inputIterator.next(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.weight(), weightField.numericValue().longValue()); IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME); assertNull(payloadField); assertTrue(inputIterator.payload().length == 0); IOUtils.close(ir, analyzer, dir); }
Example #9
Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testMultiValuedField() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME); InputIterator inputIterator = dictionary.getEntryIterator(); BytesRef f; Iterator<Suggestion> suggestionsIter = suggestions.iterator(); while((f = inputIterator.next())!=null) { Suggestion nextSuggestion = suggestionsIter.next(); assertTrue(f.equals(nextSuggestion.term)); long weight = nextSuggestion.weight; assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0); assertEquals(inputIterator.payload(), nextSuggestion.payload); assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts)); } assertFalse(suggestionsIter.hasNext()); IOUtils.close(ir, analyzer, dir); }
Example #10
Source File: TestHighFrequencyDictionary.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testEmpty() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.getEntryIterator(); assertNull(tf.next()); dir.close(); }
Example #11
Source File: LuceneWordSearch.java From preDict with GNU Lesser General Public License v3.0 | 5 votes |
@Override public void indexingDone() { try { spellChecker = new DirectSpellChecker(); spellChecker.setMaxEdits(2); spellChecker.setAccuracy(0.1f); spellChecker.setMinPrefix(0); reader = DirectoryReader.open(writer); fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer()); Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() { @Override public boolean needsScores() { return false; } @Override public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { return null; } }); fuzzySuggester.build(dict); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); } catch (IOException e) { throw new RuntimeException(e); } }
Example #12
Source File: DocumentExpressionDictionaryFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String) params.get(FIELD); String payloadField = (String) params.get(PAYLOAD_FIELD); String weightExpression = (String) params.get(WEIGHT_EXPRESSION); Set<SortField> sortFields = new HashSet<>(); if (field == null) { throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); } if (weightExpression == null) { throw new IllegalArgumentException(WEIGHT_EXPRESSION + " is a mandatory parameter"); } for(int i = 0; i < params.size(); i++) { if (params.getName(i).equals(SORT_FIELD)) { String sortFieldName = (String) params.getVal(i); sortFields.add(getSortField(core, sortFieldName)); } } return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression, sortFields), payloadField); }
Example #13
Source File: TreatmentCurator.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static SpellChecker createIndexSpellchecker(@NotNull Directory index) throws IOException { Directory spellCheckerDirectory = new RAMDirectory(); IndexReader indexReader = DirectoryReader.open(index); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f); SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory); spellChecker.indexDictionary(dictionary, config, false); spellChecker.setAccuracy(SPELLCHECK_ACCURACY); return spellChecker; }
Example #14
Source File: SearchSpellChecker.java From olat with Apache License 2.0 | 4 votes |
/** * Creates a new spell-check index based on search-index */ public void createSpellIndex() { if (isSpellCheckEnabled) { IndexReader indexReader = null; try { log.info("Start generating Spell-Index..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(indexPath)); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex final File spellDictionaryFile = new File(spellDictionaryPath); final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); // Merge all part spell indexes (content,title etc.) to one common spell index final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); merger.optimize(); merger.close(); spellChecker = new SpellChecker(spellIndexDirectory); spellChecker.setAccuracy(0.7f); if (log.isDebugEnabled()) { log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms"); } log.info("New generated Spell-Index ready to use."); } catch (final IOException ioEx) { log.warn("Can not create SpellIndex", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } } }
Example #15
Source File: IndexHelper.java From document-management-system with GNU General Public License v2.0 | 4 votes |
public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) { log.info("Observed Wine added/updated event for {1} from Thread {0}", Thread.currentThread().getName(), String.valueOf(nDocVer)); String text = (nDocVer != null) ? nDocVer.getText() : null; if (text != null) { Dictionary dictionary = null; try { FullTextEntityManager ftEm = (FullTextEntityManager) entityManager; SearchFactory searchFactory = ftEm.getSearchFactory(); dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en")); } catch (IOException ioExc) { log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" + text + nDocVer.getUuid() + ioExc.toString()); } if (dictionary != null) { Directory dir = null; // only allow one thread to update the index at a time ... // the Dictionary is pre-computed, so it should happen quickly // ... // this synchronized approach only works because this component // is application-scoped synchronized (this) { try { dir = FSDirectory.open(new File("lucene_index/spellcheck")); SpellChecker spell = new SpellChecker(dir); spell.indexDictionary(dictionary); spell.close(); log.info("Successfully updated the spell checker index after Document added/updated."); } catch (Exception exc) { log.error("Failed to update the spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } } } } } }
Example #16
Source File: SearchSpellChecker.java From olat with Apache License 2.0 | 4 votes |
/** * Creates a new spell-check index based on search-index */ public static void createSpellIndex(final SearchModule searchModule) { final String tempSearchIndexPath = searchModule.getTempSearchIndexPath(); final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath(); IndexReader indexReader = null; try { log.info("Start generating spell check index ..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main")); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex log.info("Generating 'content' spell check index ..."); final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH); FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true); final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath); final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex log.info("Generating 'title' spell check index ..."); final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH); FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true); final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath); final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex log.info("Generating 'description' spell check index ..."); final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH); FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true); final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath); final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex log.info("Generating 'author' spell check index ..."); final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH); FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true); final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath); final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); log.info("Merging spell check indices ..."); // Merge all part spell indexes (content,title etc.) to one common spell index final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath); FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true); final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir); final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); log.info("Optimizing spell check index ..."); merger.optimize(); merger.close(); tempSpellIndexDirectory.close(); contentSpellChecker.close(); contentSpellIndexDirectory.close(); titleSpellChecker.close(); titleSpellIndexDirectory.close(); descriptionSpellChecker.close(); descriptionSpellIndexDirectory.close(); authorSpellChecker.close(); authorSpellIndexDirectory.close(); FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true); if (log.isDebugEnabled()) { log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms."); } } catch (final IOException ioEx) { log.warn("Can not create spell check index.", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } }
Example #17
Source File: DictionaryFactory.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Create a Dictionary using options in <code>core</code> and optionally * uses <code>searcher</code>, in case of index based dictionaries */ public abstract Dictionary create(SolrCore core, SolrIndexSearcher searcher);
Example #18
Source File: Lookup.java From lucene-solr with Apache License 2.0 | 2 votes |
/** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use * {@link SortedInputIterator} or * {@link UnsortedInputIterator} in such case. */ public void build(Dictionary dict) throws IOException { build(dict.getEntryIterator()); }