org.apache.lucene.search.spell.LuceneDictionary Java Exaples

Source File: VocabularyNeo4jImpl.java From SciGraph with Apache License 2.0

6 votes

@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
    @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
    NodeTransformer transformer) throws IOException {
  this.graph = graph;
  this.curieUtil = curieUtil;
  this.transformer = transformer;
  if (null != neo4jLocation) {
    Directory indexDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"))
            .toPath());
    Directory spellDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker"))
            .toPath());
    spellChecker = new SpellChecker(spellDirectory);
    try (IndexReader reader = DirectoryReader.open(indexDirectory)) {
      IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
      spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL
          + LuceneUtils.EXACT_SUFFIX), config, true);
    }
  } else {
    spellChecker = null;
  }
}

Source File: IndexHelper.java From document-management-system with GNU General Public License v2.0

5 votes

protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
	IndexReader reader = null;
	Directory dir = null;
	long _entr = System.currentTimeMillis();
	File spellCheckIndexDir = new File("lucene_index/spellcheck");
	log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
	ReaderProvider readerProvider = searchFactory.getReaderProvider();

	try {
		reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
		dir = FSDirectory.open(spellCheckIndexDir);
		SpellChecker spell = new SpellChecker(dir);
		spell.clearIndex();
		spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
		spell.close();
		dir.close();
		dir = null;
		long _exit = System.currentTimeMillis();
		log.info("Took {1} (ms) to build SpellChecker index in {0}",
				spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr)));
	} catch (Exception exc) {
		log.error("Failed to build spell checker index!", exc);
	} finally {
		if (dir != null) {
			try {
				dir.close();
			} catch (Exception zzz) {
			}
		}
		if (reader != null) {
			readerProvider.closeReader(reader);
		}
	}
}

Source File: AutoCompleter.java From webdsl with Apache License 2.0

5 votes

/**
  * Indexes the data from the given reader.
* @param reader Source index reader, from which autocomplete words are obtained for the defined field
* @param field the field of the source index reader to index for autocompletion
* @param mergeFactor mergeFactor to use when indexing
* @param ramMB the max amount or memory in MB to use
* @param optimize whether or not the autocomplete index should be optimized
  * @throws AlreadyClosedException if the Autocompleter is already closed
  * @throws IOException
  */
 public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException {
   synchronized (modifyCurrentIndexLock) {
     ensureOpen();
     final Directory dir = this.autoCompleteIndex;
     final Dictionary dict = new LuceneDictionary(reader, field);
     final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
     IndexSearcher indexSearcher = obtainSearcher();
     final List<IndexReader> readers = new ArrayList<IndexReader>();

     if (searcher.maxDoc() > 0) {
       ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
     }

     //clear the index
     writer.deleteAll();

     try {
       Iterator<String> iter = dict.getWordsIterator();

     while (iter.hasNext()) {
         String word = iter.next();

         // ok index the word
         Document doc = createDocument(word, reader.docFreq(new Term(field, word)));
         writer.addDocument(doc);
       }
     } finally {
       releaseSearcher(indexSearcher);
     }
     // close writer
     if (optimize)
       writer.optimize();
     writer.close();
     // also re-open the autocomplete index to see our own changes when the next suggestion
     // is fetched:
     swapSearcher(dir);
   }
 }

Source File: SearchSpellChecker.java From olat with Apache License 2.0

4 votes

/**
 * Creates a new spell-check index based on search-index
 */
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebugEnabled()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebugEnabled()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}

Source File: SearchSpellChecker.java From olat with Apache License 2.0

4 votes

/**
 * Creates a new spell-check index based on search-index
 */
public static void createSpellIndex(final SearchModule searchModule) {
    final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
    final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();

    IndexReader indexReader = null;
    try {
        log.info("Start generating spell check index ...");

        long startSpellIndexTime = 0;
        if (log.isDebugEnabled()) {
            startSpellIndexTime = System.currentTimeMillis();
        }
        final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
        indexReader = IndexReader.open(indexDir);

        // 1. Create content spellIndex
        log.info("Generating 'content' spell check index ...");
        final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
        final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
        final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
        contentSpellChecker.indexDictionary(contentDictionary);

        // 2. Create title spellIndex
        log.info("Generating 'title' spell check index ...");
        final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
        final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
        final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
        titleSpellChecker.indexDictionary(titleDictionary);

        // 3. Create description spellIndex
        log.info("Generating 'description' spell check index ...");
        final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
        final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
        final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
        descriptionSpellChecker.indexDictionary(descriptionDictionary);

        // 4. Create author spellIndex
        log.info("Generating 'author' spell check index ...");
        final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
        final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
        final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
        final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
        authorSpellChecker.indexDictionary(authorDictionary);

        log.info("Merging spell check indices ...");
        // Merge all part spell indexes (content,title etc.) to one common spell index
        final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
        FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
        final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
        final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
        final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
        merger.addIndexesNoOptimize(directories);

        log.info("Optimizing spell check index ...");
        merger.optimize();
        merger.close();

        tempSpellIndexDirectory.close();

        contentSpellChecker.close();
        contentSpellIndexDirectory.close();

        titleSpellChecker.close();
        titleSpellIndexDirectory.close();

        descriptionSpellChecker.close();
        descriptionSpellIndexDirectory.close();

        authorSpellChecker.close();
        authorSpellIndexDirectory.close();

        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);

        if (log.isDebugEnabled()) {
            log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
        }
    } catch (final IOException ioEx) {
        log.warn("Can not create spell check index.", ioEx);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (final IOException e) {
                log.warn("Can not close indexReader properly", e);
            }
        }
    }
}

org.apache.lucene.search.spell.LuceneDictionary Java Examples