org.apache.lucene.index.IndexWriterConfig#setOpenMode

Source File: Txt2PubmedIdIndexer.java From bluima with Apache License 2.0

6 votes

@Override
public void initialize(UimaContext context)
        throws ResourceInitializationException {
    super.initialize(context);
    try {
        // create writer
        Directory dir;
        dir = FSDirectory.open(new File(INDEX_PATH));
        Analyzer analyzer = getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(
                Version.LUCENE_41, analyzer);
        iwc.setOpenMode(OpenMode.CREATE);
        indexWriter = new IndexWriter(dir, iwc);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

Source File: LucenePerUserWaveViewHandlerImpl.java From incubator-retired-wave with Apache License 2.0

6 votes

@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
                                        ReadableWaveletDataProvider waveletProvider,
                                        @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
                                        @IndexExecutor Executor executor) {
  this.waveletProvider = waveletProvider;
  this.executor = executor;
  analyzer = new StandardAnalyzer(LUCENE_VERSION);
  try {
    IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
    nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
  } catch (IOException ex) {
    throw new IndexException(ex);
  }

  nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
  nrtManagerReopenThread.start();
}

Source File: LucenePerUserWaveViewHandlerImpl.java From swellrt with Apache License 2.0

6 votes

@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
                                        ReadableWaveletDataProvider waveletProvider,
                                        @Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
                                        @IndexExecutor Executor executor) {
  this.waveletProvider = waveletProvider;
  this.executor = executor;
  analyzer = new StandardAnalyzer(LUCENE_VERSION);
  try {
    IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
    nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
  } catch (IOException ex) {
    throw new IndexException(ex);
  }

  nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
  nrtManagerReopenThread.start();
}

Source File: Index.java From dacapobench with Apache License 2.0

6 votes

/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriterConfig IWConfig = new IndexWriterConfig();
  IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
  IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
  IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.forceMerge(1);
  }
  writer.close();
}

Source File: LuceneVsLuceneTest.java From orientdb-lucene with Apache License 2.0

6 votes

@BeforeClass
public void init() {
  initDB();
  OSchema schema = databaseDocumentTx.getMetadata().getSchema();
  OClass v = schema.getClass("V");
  OClass song = schema.createClass("Song");
  song.setSuperClass(v);
  song.createProperty("title", OType.STRING);
  song.createProperty("author", OType.STRING);

  try {
    Directory dir = getDirectory();
    Analyzer analyzer = new StandardAnalyzer(OLuceneIndexManagerAbstract.LUCENE_VERSION);
    IndexWriterConfig iwc = new IndexWriterConfig(OLuceneIndexManagerAbstract.LUCENE_VERSION, analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexWriter = new IndexWriter(dir, iwc);

  } catch (IOException e) {
    e.printStackTrace();
  }
  databaseDocumentTx.command(new OCommandSQL("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE")).execute();

}

Source File: InMemoryIndex.java From SnowGraph with Apache License 2.0

6 votes

public InMemoryIndex(Map<String,String> id2Text){
    Analyzer analyzer = new EnglishAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    try {
        IndexWriter writer = new IndexWriter(directory, iwc);
        for (String id:id2Text.keySet()) {
            Document doc=new Document();
            doc.add(new StringField("id", id, Field.Store.YES));
            doc.add(new TextField("content", id2Text.get(id), Field.Store.YES));
            writer.addDocument(doc);
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

Source File: Indexer.java From gerbil with GNU Affero General Public License v3.0

5 votes

public Indexer(String path)
		throws GerbilException {
	try {
		dir = FSDirectory.open(new File(path).toPath());
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
		config.setOpenMode(OpenMode.CREATE);
		writer = new IndexWriter(dir, config);
	} catch (IOException e) {
		LOGGER.error("Error occured during accesing file " + path, e);
		throw new GerbilException(ErrorTypes.UNEXPECTED_EXCEPTION);
	}
}

Source File: Collection.java From openbd-core with GNU General Public License v3.0

5 votes

/**
 * Creates an empty collection to get it up and running
 */
public synchronized void create( boolean _errorOnExists ) throws IOException {
	setDirectory();
	
	if ( directory.listAll().length > 2 ) {
		if ( _errorOnExists ) {
			throw new IOException( "directory not empty; possible collection already present" );
		}else {
			if ( DirectoryReader.indexExists( directory ) ) {
				return;
			}// otherwise an index doesn't exist so allow the creation code to execute
		}
	}

	IndexWriterConfig iwc = new IndexWriterConfig( AnalyzerFactory.get(language) );
	iwc.setOpenMode( OpenMode.CREATE );
	
	indexwriter = new IndexWriter(directory, iwc);
	indexwriter.commit();
	indexwriter.close();
	indexwriter = null;
	
	// throw an openbd.create file in there so we know when it was created
	created	= System.currentTimeMillis();
	File touchFile	= new File( collectionpath, "openbd.created" );
	Writer	fw	= new FileWriter( touchFile );
	fw.close();
}

Source File: SearchEngineConfiguration.java From gravitee-management-rest-api with Apache License 2.0

5 votes

@Bean
public IndexWriter indexWriter(Directory directory, Analyzer analyzer) throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    return new IndexWriter(directory, iwc);
}

Source File: AnalyzingInfixSuggester.java From lucene-solr with Apache License 2.0

5 votes

/** Override this to customize index settings, e.g. which
 *  codec to use. */
protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
  IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
  iwc.setOpenMode(openMode);

  // This way all merged segments will be sorted at
  // merge time, allow for per-segment early termination
  // when those segments are searched:
  iwc.setIndexSort(SORT);

  return iwc;
}

Source File: Indexer.java From gerbil with GNU Affero General Public License v3.0

5 votes

public static Indexer create(String indexDirPath) {
    Directory indexDirectory = null;
    try {
        indexDirectory = FSDirectory.open(new File(indexDirPath).toPath());
        IndexWriterConfig config = new IndexWriterConfig();
        config.setOpenMode(OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(indexDirectory, config);
        return new Indexer(indexDirectory, indexWriter);
    } catch (IOException e) {
        LOGGER.error("Exception while trying to create index writer for entity checking. Returning null.", e);
        IOUtils.closeQuietly(indexDirectory);
        return null;
    }
}

Source File: LuceneIndex.java From rdf4j with BSD 3-Clause "New" or "Revised" License

5 votes

private void postInit() throws IOException {
	this.queryAnalyzer = new StandardAnalyzer();

	// do some initialization for new indices
	if (!DirectoryReader.indexExists(directory)) {
		logger.debug("creating new Lucene index in directory {}", directory);
		IndexWriterConfig indexWriterConfig = getIndexWriterConfig();
		indexWriterConfig.setOpenMode(OpenMode.CREATE);
		IndexWriter writer = new IndexWriter(directory, indexWriterConfig);
		writer.close();
	}
}

Source File: IndexFiles.java From elasticsearch-full with Apache License 2.0

5 votes

public static void main(String[] args) {

        String indexPath = "index";
        String docsPath = "/Users/admin/github/elasticsearch-full/JAVA-QUERY-API/";
        boolean create = true;

        final Path docDir = Paths.get(docsPath);
        if (!Files.isReadable(docDir)) {
            System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path");
            System.exit(1);
        }

        Date start = new Date();
        try {
            System.out.println("Indexing to directory '" + indexPath + "'...");
            Directory dir = FSDirectory.open(Paths.get(indexPath));
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            if (create) {
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            } else {
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            }

            IndexWriter writer = new IndexWriter(dir, iwc);
            indexDocs(writer, docDir);
            writer.close();
            Date end = new Date();
            System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        } catch (Exception e) {
            System.out.println(" caught a " + e.getClass() +
                    "\n with message: " + e.getMessage());
        }

    }

Source File: NLPIRTokenizerTest.java From nlpir-analysis-cn-ictclas with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	// NLPIR
	NLPIRTokenizerAnalyzer nta = new NLPIRTokenizerAnalyzer("", 1, "", "", false);
	// Index
	IndexWriterConfig inconf = new IndexWriterConfig(nta);
	inconf.setOpenMode(OpenMode.CREATE_OR_APPEND);
	IndexWriter index = new IndexWriter(FSDirectory.open(Paths.get("index/")), inconf);
	Document doc = new Document();
	doc.add(new TextField("contents",
			"特朗普表示，很高兴汉堡会晤后再次同习近平主席通话。我同习主席就重大问题保持沟通和协调、两国加强各层级和各领域交往十分重要。当前，美中关系发展态势良好，我相信可以发展得更好。我期待着对中国进行国事访问。",
			Field.Store.YES));
	index.addDocument(doc);
	index.flush();
	index.close();
	// Search
	String field = "contents";
	IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("index/")));
	IndexSearcher searcher = new IndexSearcher(reader);
	QueryParser parser = new QueryParser(field, nta);
	Query query = parser.parse("特朗普习近平");
	TopDocs top = searcher.search(query, 100);
	System.out.println("总条数：" + top.totalHits);
	ScoreDoc[] hits = top.scoreDocs;
	for (int i = 0; i < hits.length; i++) {
		System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
		Document d = searcher.doc(hits[i].doc);
		System.out.println(d.get("contents"));
	}

}

Source File: IndexUtil.java From everywhere with Apache License 2.0

5 votes

public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(50);
    mergePolicy.setMaxMergeDocs(5000);
    if (create){
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}

Source File: LuceneMessageSearchIndex.java From james-project with Apache License 2.0

5 votes

protected IndexWriterConfig createConfig(Analyzer analyzer, boolean dropIndexOnStart) {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    if (dropIndexOnStart) {
        config.setOpenMode(OpenMode.CREATE);
    } else {
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    return config;
}

Source File: DocumentIndexer.java From act with GNU General Public License v3.0

4 votes

public static void main(String[] args) throws Exception {
  System.out.println("Starting up...");
  System.out.flush();
  Options opts = new Options();
  opts.addOption(Option.builder("i").
      longOpt("input").hasArg().required().desc("Input file or directory to index").build());
  opts.addOption(Option.builder("x").
      longOpt("index").hasArg().required().desc("Path to index file to generate").build());
  opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
  opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

  HelpFormatter helpFormatter = new HelpFormatter();
  CommandLineParser cmdLineParser = new DefaultParser();
  CommandLine cmdLine = null;
  try {
    cmdLine = cmdLineParser.parse(opts, args);
  } catch (ParseException e) {
    System.out.println("Caught exception when parsing command line: " + e.getMessage());
    helpFormatter.printHelp("DocumentIndexer", opts);
    System.exit(1);
  }

  if (cmdLine.hasOption("help")) {
    helpFormatter.printHelp("DocumentIndexer", opts);
    System.exit(0);
  }

  if (cmdLine.hasOption("verbose")) {
    // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
    LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
    Configuration ctxConfig = ctx.getConfiguration();
    LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
    logConfig.setLevel(Level.DEBUG);

    ctx.updateLoggers();
    LOGGER.debug("Verbose logging enabled");
  }

  LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
  Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

  /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
   * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
   * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
   * cruft to the index. */
  Analyzer analyzer = CustomAnalyzer.builder().
      withTokenizer("whitespace").
      addTokenFilter("lowercase").
      addTokenFilter("stop").
      build();

  IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
  writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
  writerConfig.setRAMBufferSizeMB(1 << 10);
  IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

  String inputFileOrDir = cmdLine.getOptionValue("input");
  File splitFileOrDir = new File(inputFileOrDir);
  if (!(splitFileOrDir.exists())) {
    LOGGER.error("Unable to find directory at " + inputFileOrDir);
    System.exit(1);
  }

  DocumentIndexer indexer = new DocumentIndexer(indexWriter);
  PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
  corpusReader.readPatentCorpus();
  indexer.commitAndClose();
}

Source File: IndexFiles.java From Java-Data-Science-Cookbook with MIT License

4 votes

public static void main(String[] args) {
	String indexPath = "index";
	String docsPath = null;
	boolean create = true;
	for(int i=0;i<args.length;i++) {
		if ("-index".equals(args[i])) {
			indexPath = args[i+1];
			i++;
		} else if ("-docs".equals(args[i])) {
			docsPath = args[i+1];
			i++;
		} else if ("-update".equals(args[i])) {
			create = false;
		}
	}

	final Path docDir = Paths.get(docsPath);

	Date start = new Date();
	try {
		System.out.println("Indexing to directory '" + indexPath + "'...");

		Directory dir = FSDirectory.open(Paths.get(indexPath));
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

		if (create) {
			iwc.setOpenMode(OpenMode.CREATE);
		} else {
			iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
		}
		IndexWriter writer = new IndexWriter(dir, iwc);
		indexDocs(writer, docDir);

		writer.close();

		Date end = new Date();
		System.out.println(end.getTime() - start.getTime() + " total milliseconds");

	} catch (IOException e) {
	}
}

Source File: IndexFiles.java From word2vec-query-expansion with Apache License 2.0

4 votes

/** Index all text files under a directory. */
  @SuppressWarnings("deprecation")
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
                 + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
                 + "This indexes the documents in DOCS_PATH, creating a Lucene index"
                 + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for(int i=0;i<args.length;i++) {
      if ("-index".equals(args[i])) {
        indexPath = args[i+1];
        i++;
      } else if ("-docs".equals(args[i])) {
        docsPath = args[i+1];
        i++;
      } else if ("-update".equals(args[i])) {
        create = false;
      }
    }

    if (docsPath == null) {
      System.err.println("Usage: " + usage);
      System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
      System.exit(1);
    }
    
    Date start = new Date();
    try {
      System.out.println("Indexing to directory '" + indexPath + "'...");

      Directory dir = FSDirectory.open(new File(indexPath));
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

      if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(OpenMode.CREATE);
      } else {
        // Add new documents to an existing index:
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      }

      // Optional: for better indexing performance, if you
      // are indexing many documents, increase the RAM
      // buffer.  But if you do this, increase the max heap
      // size to the JVM (eg add -Xmx512m or -Xmx1g):
      //
      // iwc.setRAMBufferSizeMB(256.0);

      IndexWriter writer = new IndexWriter(dir, iwc);
      indexDocs(writer, docDir);

      // NOTE: if you want to maximize search performance,
      // you can optionally call forceMerge here.  This can be
      // a terribly costly operation, so generally it's only
      // worth it when your index is relatively static (ie
      // you're done adding documents to it):
      //
      // writer.forceMerge(1);

      writer.close();

      Date end = new Date();
      System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() +
       "\n with message: " + e.getMessage());
    }
  }

Source File: BuildEnsembleSearchIndex.java From marathonv5 with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception{
    File samplesFilesDir = new File("build/classes/ensemble/");
    File indexDir = new File("build/classes/ensemble/search/index");
    File docDir = new File("../../../artifacts/sdk/docs/api");
    File samplesDir = new File("src/ensemble/samples");
    // create index
    ///System.out.println("Indexing to directory '" + indexDir + "'...");
    long start = System.currentTimeMillis();
    Directory dir = FSDirectory.open(indexDir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    // generate and write index of all java doc and samples
    IndexWriter writer = new IndexWriter(dir, iwc);

    List<String> samplesFileList = new ArrayList<String>();

    indexSamples(writer, samplesDir, samplesFileList);
    try {
        indexJavaDocAllClasses(writer, docDir);
    } catch (Exception e) {
        System.out.println("\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n"
                + "Ensemble search will not include the API documentation.\n"); 
    }
    writer.close();
    // create a listAll.txt file that is used
    FileWriter listAllOut = new FileWriter(new File(indexDir,"listAll.txt"));
    for (String fileName: dir.listAll()) {
        if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file
            Long length = dir.fileLength(fileName);
            listAllOut.write(fileName);
            listAllOut.write(':');
            listAllOut.write(length.toString());
            listAllOut.write('\n');
        }
    }
    listAllOut.flush();
    listAllOut.close();

    FileWriter sampleFilesCache = new FileWriter(new File(samplesFilesDir,"samplesAll.txt"));
    for (String oneSample: samplesFileList) {
            sampleFilesCache.write(oneSample);
            sampleFilesCache.write('\n');
    }
    sampleFilesCache.flush();
    sampleFilesCache.close();

    // print time taken
    ///System.out.println(System.currentTimeMillis() - start + " total milliseconds");
}

Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setOpenMode()