Java Code Examples for org.apache.lucene.index.IndexWriter#optimize()
The following examples show how to use
org.apache.lucene.index.IndexWriter#optimize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneContentSvcImpl.java From Lottery with GNU General Public License v2.0 | 6 votes |
@Transactional(readOnly = true) public Integer createIndex(Integer siteId, Integer channelId, Date startDate, Date endDate, Integer startId, Integer max, Directory dir) throws IOException, ParseException { boolean exist = IndexReader.indexExists(dir); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer( Version.LUCENE_30), !exist, IndexWriter.MaxFieldLength.LIMITED); try { if (exist) { LuceneContent.delete(siteId, channelId, startDate, endDate, writer); } Integer lastId = luceneContentDao.index(writer, siteId, channelId, startDate, endDate, startId, max); writer.optimize(); return lastId; } finally { writer.close(); } }
Example 2
Source File: CrawlerTest.java From JPPF with Apache License 2.0 | 5 votes |
/** * Test of indexing with Lucene. * @throws Exception if an error is thrown while executing. */ public static void luceneIndex() throws Exception { // setting default parameters final int depth = 3; // create Lucene index writer final IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true); writer.setUseCompoundFile(true); writer.setMaxFieldLength(1000000); // common crawler settings final Crawler crawler = new Crawler(); crawler.setLinkFilter(new ServerFilter(server)); crawler.setModel(new MaxDepthModel(depth)); crawler.addParserListener(new IParserEventListener() { @Override public void parse(final ParserEvent event) { print("Parsing link: " + event.getLink()); } }); // create Lucene parsing listener and add it final LuceneParserEventListener listener = new LuceneParserEventListener(writer); crawler.addParserListener(listener); // start crawler crawler.start(server, startPage); // Optimizing Lucene index writer.optimize(); writer.close(); }
Example 3
Source File: AutoCompleter.java From webdsl with Apache License 2.0 | 5 votes |
/** * Indexes the data from the given reader. * @param reader Source index reader, from which autocomplete words are obtained for the defined field * @param field the field of the source index reader to index for autocompletion * @param mergeFactor mergeFactor to use when indexing * @param ramMB the max amount or memory in MB to use * @param optimize whether or not the autocomplete index should be optimized * @throws AlreadyClosedException if the Autocompleter is already closed * @throws IOException */ public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException { synchronized (modifyCurrentIndexLock) { ensureOpen(); final Directory dir = this.autoCompleteIndex; final Dictionary dict = new LuceneDictionary(reader, field); final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB)); IndexSearcher indexSearcher = obtainSearcher(); final List<IndexReader> readers = new ArrayList<IndexReader>(); if (searcher.maxDoc() > 0) { ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader()); } //clear the index writer.deleteAll(); try { Iterator<String> iter = dict.getWordsIterator(); while (iter.hasNext()) { String word = iter.next(); // ok index the word Document doc = createDocument(word, reader.docFreq(new Term(field, word))); writer.addDocument(doc); } } finally { releaseSearcher(indexSearcher); } // close writer if (optimize) writer.optimize(); writer.close(); // also re-open the autocomplete index to see our own changes when the next suggestion // is fetched: swapSearcher(dir); } }
Example 4
Source File: CourseServiceImpl.java From TinyMooc with Apache License 2.0 | 5 votes |
public boolean createCourseIndex() { List<Course> list = this.getCourses(); try { Directory directory = FSDirectory.getDirectory(INDEXPATH); IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); for (Course course : list) { Document doc = new Document(); String courseTitle = course.getCourseTitle() == null ? "" : course.getCourseTitle().trim(); String courseIntro = course.getCourseIntro() == null ? "" : course.getCourseIntro(); String courseId = course.getCourseId() == null ? "" : course.getCourseId(); String type = course.getType() == null ? "" : course.getType(); String courseState = course.getCourseState() == null ? "" : course.getCourseState(); doc.add(new Field("courseIntro", courseIntro, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("courseTitle", courseTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("courseId", courseId, Field.Store.COMPRESS, Field.Index.NO)); doc.add(new Field("type", type, Field.Store.COMPRESS, Field.Index.NO)); doc.add(new Field("courseState", courseState, Field.Store.COMPRESS, Field.Index.NO)); indexWriter.addDocument(doc); } indexWriter.optimize(); indexWriter.close(); return true; } catch (Exception e) { logger.error("createCourseIndex error."); return false; } }
Example 5
Source File: Main.java From aedict with GNU General Public License v3.0 | 5 votes |
private void indexWithLucene() throws IOException { System.out.println("Deleting old Lucene index"); FileUtils.deleteDirectory(new File(LUCENE_INDEX)); System.out.println("Indexing with Lucene"); final BufferedReader dictionary = config.newReader(); try { final Directory directory = FSDirectory.open(new File(LUCENE_INDEX)); try { final IndexWriter luceneWriter = new IndexWriter(directory, new StandardAnalyzer(LuceneSearch.LUCENE_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED); try { final IDictParser parser = config.fileType.newParser(config); indexWithLucene(dictionary, luceneWriter, parser); System.out.println("Optimizing Lucene index"); luceneWriter.optimize(); } finally { luceneWriter.close(); } } finally { closeQuietly(directory); } } finally { IOUtils.closeQuietly(dictionary); } System.out.println("Finished Lucene indexing"); }
Example 6
Source File: BuildIndexForEntityFragments.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 4 votes |
public void indexforentity() throws Exception { if(EntityFragmentFields.entityId2Name == null) EntityFragmentFields.load(); long startTime = new Date().getTime(); //Try update KB index to DBpedia2015. by husen 2016-04-08 //Try update KB index to DBpedia2016. by husen 2018-8-22 File indexDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/entity_fragment_index"); File sourceDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt"); Analyzer luceneAnalyzer_en = new StandardAnalyzer(); IndexWriter indexWriter_en = new IndexWriter(indexDir_en, luceneAnalyzer_en,true); int mergeFactor = 100000; //default 10 int maxBufferedDoc = 1000; //default 10 int maxMergeDoc = Integer.MAX_VALUE; //INF //indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor; indexWriter_en.setMergeFactor(mergeFactor); indexWriter_en.setMaxBufferedDocs(maxBufferedDoc); indexWriter_en.setMaxMergeDocs(maxMergeDoc); FileInputStream file = new FileInputStream(sourceDir_en); InputStreamReader in = new InputStreamReader(file,"UTF-8"); BufferedReader br = new BufferedReader(in); int count = 0; while(true) { String _line = br.readLine(); { if(_line == null) break; } count++; if(count % 100000 == 0) System.out.println(count); String line = _line; String temp[] = line.split("\t"); if(temp.length != 2) continue; else { int entity_id = Integer.parseInt(temp[0]); if(!EntityFragmentFields.entityId2Name.containsKey(entity_id)) continue; String entity_name = EntityFragmentFields.entityId2Name.get(entity_id); String entity_fragment = temp[1]; entity_name = entity_name.replace("____", " "); entity_name = entity_name.replace("__", " "); entity_name = entity_name.replace("_", " "); Document document = new Document(); Field EntityName = new Field("EntityName", entity_name, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Field EntityId = new Field("EntityId", String.valueOf(entity_id), Field.Store.YES, Field.Index.NO); Field EntityFragment = new Field("EntityFragment", entity_fragment, Field.Store.YES, Field.Index.NO); document.add(EntityName); document.add(EntityId); document.add(EntityFragment); indexWriter_en.addDocument(document); } } indexWriter_en.optimize(); indexWriter_en.close(); br.close(); // input the time of Build index long endTime = new Date().getTime(); System.out.println("entity_name index has build ->" + count + " " + "Time:" + (endTime - startTime)); }
Example 7
Source File: BuildIndexForTypeShortName.java From gAnswer with BSD 3-Clause "New" or "Revised" License | 4 votes |
public static void buildIndex(HashMap<String, ArrayList<Integer>> typeShortName2IdList) throws Exception { long startTime = new Date().getTime(); File indexDir_li = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/type_fragment_index"); Analyzer luceneAnalyzer_li = new StandardAnalyzer(); IndexWriter indexWriter_li = new IndexWriter(indexDir_li, luceneAnalyzer_li,true); int mergeFactor = 100000; int maxBufferedDoc = 1000; int maxMergeDoc = Integer.MAX_VALUE; //indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor; indexWriter_li.setMergeFactor(mergeFactor); indexWriter_li.setMaxBufferedDocs(maxBufferedDoc); indexWriter_li.setMaxMergeDocs(maxMergeDoc); int count = 0; Iterator<String> it = typeShortName2IdList.keySet().iterator(); while (it.hasNext()) { String sn = it.next(); if (sn.length() == 0) { continue; } count ++; StringBuilder splittedSn = new StringBuilder(""); if(sn.contains("_")) { String nsn = sn.replace("_", " "); splittedSn.append(nsn.toLowerCase()); } else { int last = 0, i = 0; for(i = 0; i < sn.length(); i ++) { // if it were not a small letter, then break it. if(!(sn.charAt(i)>='a' && sn.charAt(i)<='z')) { splittedSn.append(sn.substring(last, i).toLowerCase()); splittedSn.append(' '); last = i; } } splittedSn.append(sn.substring(last, i).toLowerCase()); while(splittedSn.charAt(0) == ' ') { splittedSn.deleteCharAt(0); } } System.out.println("SplitttedType: "+splittedSn); Document document = new Document(); Field SplittedTypeShortName = new Field("SplittedTypeShortName", splittedSn.toString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); Field TypeShortName = new Field("TypeShortName", sn, Field.Store.YES, Field.Index.NO); document.add(SplittedTypeShortName); document.add(TypeShortName); indexWriter_li.addDocument(document); } indexWriter_li.optimize(); indexWriter_li.close(); // input the time of Build index long endTime = new Date().getTime(); System.out.println("TypeShortName index has build ->" + count + " " + "Time:" + (endTime - startTime)); }
Example 8
Source File: AnchorIndexer.java From tagme with Apache License 2.0 | 4 votes |
@Override public void makeIndex(String lang, File workingDir) throws IOException { log.info("Loading support datasets..."); File all_anchors = new WikipediaAnchorParser(lang).getFile(); long numAnchors = ExternalSortUtils.wcl(all_anchors); AnchorIterator iterator = new AnchorIterator(all_anchors); IntSet people = new PeopleWIDs(lang).getDataset(); // IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang)); IndexSearcher articles = openWikipediaIndex(lang); //QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34)); QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>())); IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer())); Document doc = new Document(); Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED); Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED); Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO); doc.add(fId); doc.add(fText); doc.add(fObject); // Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED); // Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED); PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped"); plog.setEnd(0, numAnchors); plog.start("Support datasets loaded, now parsing..."); int id=0; while(iterator.next()) { plog.update(0, iterator.scroll); plog.update(1); String anchorText = iterator.anchor; int freq = freq(iterator.originals, articles, queryParser); plog.update(2, iterator.originals.size()); if (freq == 0) plog.update(4); Anchor anchorObj = Anchor.build(id, iterator.links, freq, people); if (anchorObj == null){ plog.update(5); continue; } String anchorSerial = Anchor.serialize(anchorObj); fId.setValue(Integer.toString(++id)); fText.setValue(anchorText); fObject.setValue(anchorSerial); for(int page : anchorObj){ Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED); // fWID.setBoost(iterator.links.get(page)); doc.add(fWID); } for(String original : iterator.originals) { doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED)); } index.addDocument(doc); plog.update(3); doc.removeFields(FIELD_ORIGINAL); doc.removeFields(FIELD_WID); } plog.stop(); iterator.close(); log.info("Now optimizing..."); index.optimize(); index.close(); log.info("Done."); }
Example 9
Source File: TopicIndexer.java From tagme with Apache License 2.0 | 4 votes |
@Override public void makeIndex(String lang, File workingDir) throws IOException { IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang)); Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset(); IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer())); Document doc = new Document(); Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED); Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED); Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO); Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO); doc.add(fWID); doc.add(fTitle); doc.add(fAbstract); doc.add(fBestAnchor); int max = articles.maxDoc(); PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest"); plog.setEnd(max); plog.start("Start indexing..."); for(int i=0; i<max; i++) { plog.update(0); Document oldDoc = articles.document(i); PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE)); if (type == PageType.TOPIC) { int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID)); fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID)); fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT)); fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE)); String bestAnchor = bestAnchorMap.get(wid); if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2); fBestAnchor.setValue(bestAnchor==null?"":bestAnchor); String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT); if (cats != null) { for (int j=0; j<cats.length; j++) doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED)); } index.addDocument(doc); plog.update(1); doc.removeFields(FIELD_CAT); } } plog.stop(); log.info("Now optimizing..."); index.optimize(); index.close(); //we cannot call this because the index is still in the temporary dir //so TopicDocs will be created using old index // log.info("Index Done, now creating WID->DOC_ID map"); // // TopicDocs td = new TopicDocs(lang); // td.forceParsing(); log.info("Done."); }
Example 10
Source File: SearchSpellChecker.java From olat with Apache License 2.0 | 4 votes |
/** * Creates a new spell-check index based on search-index */ public void createSpellIndex() { if (isSpellCheckEnabled) { IndexReader indexReader = null; try { log.info("Start generating Spell-Index..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(indexPath)); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex final File spellDictionaryFile = new File(spellDictionaryPath); final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); // Merge all part spell indexes (content,title etc.) to one common spell index final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); merger.optimize(); merger.close(); spellChecker = new SpellChecker(spellIndexDirectory); spellChecker.setAccuracy(0.7f); if (log.isDebugEnabled()) { log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms"); } log.info("New generated Spell-Index ready to use."); } catch (final IOException ioEx) { log.warn("Can not create SpellIndex", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } } }
Example 11
Source File: OlatFullIndexer.java From olat with Apache License 2.0 | 4 votes |
/** * Create index-writer object. In multi-threaded mode ctreates an array of index-workers. Start indexing with main-index as root object. Index recursive all elements. * At the end optimze and close new index. The new index is stored in [temporary-index-path]/main * * @throws InterruptedException */ private void doIndex() throws InterruptedException { try { final File tempIndexDir = new File(tempIndexPath); final Directory indexPath = FSDirectory.open(new File(tempIndexDir, "main")); final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); indexWriter = new IndexWriter(indexPath, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.deleteAll(); indexWriter.setMergeFactor(INDEX_MERGE_FACTOR); // for better performance indexWriter.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster") log.info("IndexWriter config RAMBufferSizeMB=" + indexWriter.getRAMBufferSizeMB()); indexWriter.setUseCompoundFile(useCompoundFile); // for better performance (see lucene docu 'how to make indexing faster") log.info("IndexWriter config UseCompoundFile=" + indexWriter.getUseCompoundFile()); // Create IndexWriterWorker log.info("Running with " + numberIndexWriter + " IndexerWriterWorker"); indexWriterWorkers = new IndexWriterWorker[numberIndexWriter]; final Directory[] partIndexDirs = new Directory[numberIndexWriter]; for (int i = 0; i < numberIndexWriter; i++) { final IndexWriterWorker indexWriterWorker = new IndexWriterWorker(i, tempIndexDir, this); indexWriterWorkers[i] = indexWriterWorker; indexWriterWorkers[i].start(); partIndexDirs[i] = indexWriterWorkers[i].getIndexDir(); } final SearchResourceContext searchResourceContext = new SearchResourceContext(); log.info("doIndex start. OlatFullIndexer with Debug output"); mainIndexer.doIndex(searchResourceContext, null /* no parent */, this); log.info("Wait until every folder indexer is finished"); DBFactory.getInstance().commitAndCloseSession(); // check if every folder indexer is finished max waiting-time 10Min (=waitingCount-limit = 60) int waitingCount = 0; final int MAX_WAITING_COUNT = 60;// = 10Min while (FolderIndexerWorkerPool.getInstance().isIndexerRunning() && (waitingCount++ < MAX_WAITING_COUNT)) { Thread.sleep(10000); } if (waitingCount >= MAX_WAITING_COUNT) { log.info("Finished with max waiting time!"); } log.info("Set Finish-flag for each indexWriterWorkers"); // Set Finish-flag for (int i = 0; i < numberIndexWriter; i++) { indexWriterWorkers[i].finishIndexing(); } log.info("Wait until every indexworker is finished"); // check if every indexworker is finished max waiting-time 10Min (=waitingCount-limit = 60) waitingCount = 0; while (!areIndexingDone() && (waitingCount++ < MAX_WAITING_COUNT)) { Thread.sleep(10000); } if (waitingCount >= MAX_WAITING_COUNT) { log.info("Finished with max waiting time!"); } // Merge all partIndex DBFactory.getInstance().commitAndCloseSession(); if (partIndexDirs.length > 0) { log.info("Start merging part Indexes"); indexWriter.addIndexesNoOptimize(partIndexDirs); log.info("Added all part Indexes"); } fullIndexerStatus.setIndexSize(indexWriter.maxDoc()); indexWriter.optimize(); indexWriter.close(); } catch (final IOException e) { e.printStackTrace(); log.warn("Can not create IndexWriter, indexname=" + tempIndexPath, e); } finally { DBFactory.getInstance().commitAndCloseSession(); log.debug("doIndex: commit & close session"); } }
Example 12
Source File: SearchSpellChecker.java From olat with Apache License 2.0 | 4 votes |
/** * Creates a new spell-check index based on search-index */ public static void createSpellIndex(final SearchModule searchModule) { final String tempSearchIndexPath = searchModule.getTempSearchIndexPath(); final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath(); IndexReader indexReader = null; try { log.info("Start generating spell check index ..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main")); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex log.info("Generating 'content' spell check index ..."); final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH); FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true); final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath); final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex log.info("Generating 'title' spell check index ..."); final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH); FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true); final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath); final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex log.info("Generating 'description' spell check index ..."); final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH); FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true); final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath); final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex log.info("Generating 'author' spell check index ..."); final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH); FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true); final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath); final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); log.info("Merging spell check indices ..."); // Merge all part spell indexes (content,title etc.) to one common spell index final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath); FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true); final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir); final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); log.info("Optimizing spell check index ..."); merger.optimize(); merger.close(); tempSpellIndexDirectory.close(); contentSpellChecker.close(); contentSpellIndexDirectory.close(); titleSpellChecker.close(); titleSpellIndexDirectory.close(); descriptionSpellChecker.close(); descriptionSpellIndexDirectory.close(); authorSpellChecker.close(); authorSpellIndexDirectory.close(); FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true); if (log.isDebugEnabled()) { log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms."); } } catch (final IOException ioEx) { log.warn("Can not create spell check index.", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } }
Example 13
Source File: OfflineSearchIndexer.java From SEAL with Apache License 2.0 | 4 votes |
public static void main(String[] argv) { try { GlobalVar gv = GlobalVar.getGlobalVar(); // get args File indexDir = gv.getIndexDir(); File localDir = gv.getLocalDir(); File root = gv.getLocalRoot(); boolean hasWrappers = false; String usage = OfflineSearchIndexer.class.getName() + " [-wrappers]"; for (int i = 0; i < argv.length; i++) { if (argv[i].equals("-wrappers")) { // parse -wrappers option log.info("wrappers set true"); hasWrappers = true; } else { log.error("Incorrect arguments in the command line"); System.err.println(usage); System.err.println(" -wrappers means the directory contains wrappers saved in earlier run of seal"); return; } } // check args if (root!=null && !System.getenv("PWD").equals(root.getPath())) { log.error("to build an index relative to "+root+" run OfflineSearchIndexer from that directory, and make localDir a relative path"); System.exit(-1); } if (root==null && !localDir.isAbsolute()) { log.warn("to build an absolute index make localDir an absolute path - this index will be relative to "+System.getenv("PWD")); } if (indexDir.exists()) { log.error("Cannot save index to '" +indexDir+ "' directory, please delete it first"); System.exit(-1); } if (!localDir.exists() || !localDir.canRead()) { System.out.println("Document directory '" +localDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); System.exit(-1); } Date start = new Date(); IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); System.out.println("Indexing to directory '" +indexDir+ "'..."); indexDocs(writer, localDir, hasWrappers); System.out.println("Optimizing..."); writer.optimize(); writer.close(); Date end = new Date(); log.info("indexed "+numIndexed+" of "+numFiles+" files"); log.info((end.getTime() - start.getTime())+" total milliseconds"); } catch (Exception e) { log.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); e.printStackTrace(); } }