Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setOpenMode()
The following examples show how to use
org.apache.lucene.index.IndexWriterConfig#setOpenMode() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Txt2PubmedIdIndexer.java From bluima with Apache License 2.0 | 6 votes |
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { // create writer Directory dir; dir = FSDirectory.open(new File(INDEX_PATH)); Analyzer analyzer = getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig( Version.LUCENE_41, analyzer); iwc.setOpenMode(OpenMode.CREATE); indexWriter = new IndexWriter(dir, iwc); } catch (IOException e) { e.printStackTrace(); } }
Example 2
Source File: LucenePerUserWaveViewHandlerImpl.java From incubator-retired-wave with Apache License 2.0 | 6 votes |
@Inject public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory, ReadableWaveletDataProvider waveletProvider, @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain, @IndexExecutor Executor executor) { this.waveletProvider = waveletProvider; this.executor = executor; analyzer = new StandardAnalyzer(LUCENE_VERSION); try { IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer); indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory.getDirectory(), indexConfig); nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain)); } catch (IOException ex) { throw new IndexException(ex); } nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC); nrtManagerReopenThread.start(); }
Example 3
Source File: LucenePerUserWaveViewHandlerImpl.java From swellrt with Apache License 2.0 | 6 votes |
@Inject public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory, ReadableWaveletDataProvider waveletProvider, @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain, @IndexExecutor Executor executor) { this.waveletProvider = waveletProvider; this.executor = executor; analyzer = new StandardAnalyzer(LUCENE_VERSION); try { IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer); indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory.getDirectory(), indexConfig); nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain)); } catch (IOException ex) { throw new IndexException(ex); } nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC); nrtManagerReopenThread.start(); }
Example 4
Source File: Index.java From dacapobench with Apache License 2.0 | 6 votes |
/** * Index all text files under a directory. */ public void main(final File INDEX_DIR, final String[] args) throws IOException { IndexWriterConfig IWConfig = new IndexWriterConfig(); IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE); IWConfig.setMergePolicy (new LogByteSizeMergePolicy()); IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig); for (int arg = 0; arg < args.length; arg++) { final File docDir = new File(args[arg]); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); throw new IOException("Cannot read from document directory"); } indexDocs(writer, docDir); System.out.println("Optimizing..."); writer.forceMerge(1); } writer.close(); }
Example 5
Source File: LuceneVsLuceneTest.java From orientdb-lucene with Apache License 2.0 | 6 votes |
@BeforeClass public void init() { initDB(); OSchema schema = databaseDocumentTx.getMetadata().getSchema(); OClass v = schema.getClass("V"); OClass song = schema.createClass("Song"); song.setSuperClass(v); song.createProperty("title", OType.STRING); song.createProperty("author", OType.STRING); try { Directory dir = getDirectory(); Analyzer analyzer = new StandardAnalyzer(OLuceneIndexManagerAbstract.LUCENE_VERSION); IndexWriterConfig iwc = new IndexWriterConfig(OLuceneIndexManagerAbstract.LUCENE_VERSION, analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(dir, iwc); } catch (IOException e) { e.printStackTrace(); } databaseDocumentTx.command(new OCommandSQL("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE")).execute(); }
Example 6
Source File: InMemoryIndex.java From SnowGraph with Apache License 2.0 | 6 votes |
public InMemoryIndex(Map<String,String> id2Text){ Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(directory, iwc); for (String id:id2Text.keySet()) { Document doc=new Document(); doc.add(new StringField("id", id, Field.Store.YES)); doc.add(new TextField("content", id2Text.get(id), Field.Store.YES)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
Example 7
Source File: Indexer.java From gerbil with GNU Affero General Public License v3.0 | 5 votes |
public Indexer(String path) throws GerbilException { try { dir = FSDirectory.open(new File(path).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(dir, config); } catch (IOException e) { LOGGER.error("Error occured during accesing file " + path, e); throw new GerbilException(ErrorTypes.UNEXPECTED_EXCEPTION); } }
Example 8
Source File: Collection.java From openbd-core with GNU General Public License v3.0 | 5 votes |
/** * Creates an empty collection to get it up and running */ public synchronized void create( boolean _errorOnExists ) throws IOException { setDirectory(); if ( directory.listAll().length > 2 ) { if ( _errorOnExists ) { throw new IOException( "directory not empty; possible collection already present" ); }else { if ( DirectoryReader.indexExists( directory ) ) { return; }// otherwise an index doesn't exist so allow the creation code to execute } } IndexWriterConfig iwc = new IndexWriterConfig( AnalyzerFactory.get(language) ); iwc.setOpenMode( OpenMode.CREATE ); indexwriter = new IndexWriter(directory, iwc); indexwriter.commit(); indexwriter.close(); indexwriter = null; // throw an openbd.create file in there so we know when it was created created = System.currentTimeMillis(); File touchFile = new File( collectionpath, "openbd.created" ); Writer fw = new FileWriter( touchFile ); fw.close(); }
Example 9
Source File: SearchEngineConfiguration.java From gravitee-management-rest-api with Apache License 2.0 | 5 votes |
@Bean public IndexWriter indexWriter(Directory directory, Analyzer analyzer) throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); return new IndexWriter(directory, iwc); }
Example 10
Source File: AnalyzingInfixSuggester.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Override this to customize index settings, e.g. which * codec to use. */ protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) { IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer); iwc.setOpenMode(openMode); // This way all merged segments will be sorted at // merge time, allow for per-segment early termination // when those segments are searched: iwc.setIndexSort(SORT); return iwc; }
Example 11
Source File: Indexer.java From gerbil with GNU Affero General Public License v3.0 | 5 votes |
public static Indexer create(String indexDirPath) { Directory indexDirectory = null; try { indexDirectory = FSDirectory.open(new File(indexDirPath).toPath()); IndexWriterConfig config = new IndexWriterConfig(); config.setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDirectory, config); return new Indexer(indexDirectory, indexWriter); } catch (IOException e) { LOGGER.error("Exception while trying to create index writer for entity checking. Returning null.", e); IOUtils.closeQuietly(indexDirectory); return null; } }
Example 12
Source File: LuceneIndex.java From rdf4j with BSD 3-Clause "New" or "Revised" License | 5 votes |
private void postInit() throws IOException { this.queryAnalyzer = new StandardAnalyzer(); // do some initialization for new indices if (!DirectoryReader.indexExists(directory)) { logger.debug("creating new Lucene index in directory {}", directory); IndexWriterConfig indexWriterConfig = getIndexWriterConfig(); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); writer.close(); } }
Example 13
Source File: IndexFiles.java From elasticsearch-full with Apache License 2.0 | 5 votes |
public static void main(String[] args) { String indexPath = "index"; String docsPath = "/Users/admin/github/elasticsearch-full/JAVA-QUERY-API/"; boolean create = true; final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (Exception e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
Example 14
Source File: NLPIRTokenizerTest.java From nlpir-analysis-cn-ictclas with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { // NLPIR NLPIRTokenizerAnalyzer nta = new NLPIRTokenizerAnalyzer("", 1, "", "", false); // Index IndexWriterConfig inconf = new IndexWriterConfig(nta); inconf.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter index = new IndexWriter(FSDirectory.open(Paths.get("index/")), inconf); Document doc = new Document(); doc.add(new TextField("contents", "特朗普表示,很高兴汉堡会晤后再次同习近平主席通话。我同习主席就重大问题保持沟通和协调、两国加强各层级和各领域交往十分重要。当前,美中关系发展态势良好,我相信可以发展得更好。我期待着对中国进行国事访问。", Field.Store.YES)); index.addDocument(doc); index.flush(); index.close(); // Search String field = "contents"; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("index/"))); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(field, nta); Query query = parser.parse("特朗普习近平"); TopDocs top = searcher.search(query, 100); System.out.println("总条数:" + top.totalHits); ScoreDoc[] hits = top.scoreDocs; for (int i = 0; i < hits.length; i++) { System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); Document d = searcher.doc(hits[i].doc); System.out.println(d.get("contents")); } }
Example 15
Source File: IndexUtil.java From everywhere with Apache License 2.0 | 5 votes |
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException { Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(50); mergePolicy.setMaxMergeDocs(5000); if (create){ iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } return new IndexWriter(dir, iwc); }
Example 16
Source File: LuceneMessageSearchIndex.java From james-project with Apache License 2.0 | 5 votes |
protected IndexWriterConfig createConfig(Analyzer analyzer, boolean dropIndexOnStart) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (dropIndexOnStart) { config.setOpenMode(OpenMode.CREATE); } else { config.setOpenMode(OpenMode.CREATE_OR_APPEND); } return config; }
Example 17
Source File: DocumentIndexer.java From act with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush(); Options opts = new Options(); opts.addOption(Option.builder("i"). longOpt("input").hasArg().required().desc("Input file or directory to index").build()); opts.addOption(Option.builder("x"). longOpt("index").hasArg().required().desc("Path to index file to generate").build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination. This custom * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much * cruft to the index. */ Analyzer analyzer = CustomAnalyzer.builder(). withTokenizer("whitespace"). addTokenFilter("lowercase"). addTokenFilter("stop"). build(); IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writerConfig.setRAMBufferSizeMB(1 << 10); IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig); String inputFileOrDir = cmdLine.getOptionValue("input"); File splitFileOrDir = new File(inputFileOrDir); if (!(splitFileOrDir.exists())) { LOGGER.error("Unable to find directory at " + inputFileOrDir); System.exit(1); } DocumentIndexer indexer = new DocumentIndexer(indexWriter); PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir); corpusReader.readPatentCorpus(); indexer.commitAndClose(); }
Example 18
Source File: IndexFiles.java From Java-Data-Science-Cookbook with MIT License | 4 votes |
public static void main(String[] args) { String indexPath = "index"; String docsPath = null; boolean create = true; for(int i=0;i<args.length;i++) { if ("-index".equals(args[i])) { indexPath = args[i+1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i+1]; i++; } else if ("-update".equals(args[i])) { create = false; } } final Path docDir = Paths.get(docsPath); Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { } }
Example 19
Source File: IndexFiles.java From word2vec-query-expansion with Apache License 2.0 | 4 votes |
/** Index all text files under a directory. */ @SuppressWarnings("deprecation") public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null; boolean create = true; for(int i=0;i<args.length;i++) { if ("-index".equals(args[i])) { indexPath = args[i+1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i+1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
Example 20
Source File: BuildEnsembleSearchIndex.java From marathonv5 with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception{ File samplesFilesDir = new File("build/classes/ensemble/"); File indexDir = new File("build/classes/ensemble/search/index"); File docDir = new File("../../../artifacts/sdk/docs/api"); File samplesDir = new File("src/ensemble/samples"); // create index ///System.out.println("Indexing to directory '" + indexDir + "'..."); long start = System.currentTimeMillis(); Directory dir = FSDirectory.open(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); // generate and write index of all java doc and samples IndexWriter writer = new IndexWriter(dir, iwc); List<String> samplesFileList = new ArrayList<String>(); indexSamples(writer, samplesDir, samplesFileList); try { indexJavaDocAllClasses(writer, docDir); } catch (Exception e) { System.out.println("\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n" + "Ensemble search will not include the API documentation.\n"); } writer.close(); // create a listAll.txt file that is used FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt")); for (String fileName: dir.listAll()) { if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file Long length = dir.fileLength(fileName); listAllOut.write(fileName); listAllOut.write(':'); listAllOut.write(length.toString()); listAllOut.write('\n'); } } listAllOut.flush(); listAllOut.close(); FileWriter sampleFilesCache = new FileWriter(new File(samplesFilesDir,"samplesAll.txt")); for (String oneSample: samplesFileList) { sampleFilesCache.write(oneSample); sampleFilesCache.write('\n'); } sampleFilesCache.flush(); sampleFilesCache.close(); // print time taken ///System.out.println(System.currentTimeMillis() - start + " total milliseconds"); }