org.apache.lucene.index.IndexWriterConfig.OpenMode#CREATE

Source File: IndexFiles.java From Java-Data-Science-Cookbook with MIT License

6 votes

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
	try (InputStream stream = Files.newInputStream(file)) {
		Document doc = new Document();
		Field pathField = new StringField("path", file.toString(), Field.Store.YES);
		doc.add(pathField);
		doc.add(new LongPoint("modified", lastModified));
		doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

		if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
			System.out.println("adding " + file);
			writer.addDocument(doc);
		} else {
			System.out.println("updating " + file);
			writer.updateDocument(new Term("path", file.toString()), doc);
		}
	}
}

Source File: PersistentSnapshotDeletionPolicy.java From lucene-solr with Apache License 2.0

6 votes

/**
 * {@link PersistentSnapshotDeletionPolicy} wraps another
 * {@link IndexDeletionPolicy} to enable flexible snapshotting.
 * 
 * @param primary
 *          the {@link IndexDeletionPolicy} that is used on non-snapshotted
 *          commits. Snapshotted commits, by definition, are not deleted until
 *          explicitly released via {@link #release}.
 * @param dir
 *          the {@link Directory} which will be used to persist the snapshots
 *          information.
 * @param mode
 *          specifies whether a new index should be created, deleting all
 *          existing snapshots information (immediately), or open an existing
 *          index, initializing the class with the snapshots information.
 */
public PersistentSnapshotDeletionPolicy(IndexDeletionPolicy primary,
    Directory dir, OpenMode mode) throws IOException {
  super(primary);

  this.dir = dir;

  if (mode == OpenMode.CREATE) {
    clearPriorSnapshots();
  }

  loadPriorSnapshots();

  if (mode == OpenMode.APPEND && nextWriteGen == 0) {
    throw new IllegalStateException("no snapshots stored in this directory");
  }
}

Source File: TestMultipleIndexFields.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testDefault() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
  FacetsConfig config = getConfig();

  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  // Obtain facets results and hand-test them
  assertCorrectResults(getTaxonomyFacetCounts(tr, config, sfc));

  assertOrdinalsExist("$facets", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Source File: TestMultipleIndexFields.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testCustom() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Author", "$author");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$author"));
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);

  assertOrdinalsExist("$facets", ir);
  assertOrdinalsExist("$author", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Source File: TestDirectoryTaxonomyWriter.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testRecreateAndRefresh() throws Exception {
  // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in
  // CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
  // DirTaxoReader succeeding to refresh().
  try (Directory dir = newDirectory()) {

    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
    touchTaxo(taxoWriter, new FacetLabel("a"));

    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);

    touchTaxo(taxoWriter, new FacetLabel("b"));

    TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
    taxoReader.close();
    taxoReader = newtr;
    assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));

    // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
    taxoWriter.close();

    taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
    touchTaxo(taxoWriter, new FacetLabel("c"));
    taxoWriter.close();

    taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
    touchTaxo(taxoWriter, new FacetLabel("d"));
    taxoWriter.close();

    newtr = TaxonomyReader.openIfChanged(taxoReader);
    taxoReader.close();
    taxoReader = newtr;
    assertEquals(2, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
    taxoReader.close();
  }
}

Source File: TestDirectoryTaxonomyWriter.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testReaderFreshness() throws Exception {
  // ensures that the internal index reader is always kept fresh. Previously,
  // this simple scenario failed, if the cache just evicted the category that
  // is being added.
  Directory dir = newDirectory();
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
  int o1 = taxoWriter.addCategory(new FacetLabel("a"));
  int o2 = taxoWriter.addCategory(new FacetLabel("a"));
  assertTrue("ordinal for same category that is added twice should be the same !", o1 == o2);
  taxoWriter.close();
  dir.close();
}

Source File: TestDirectoryTaxonomyReader.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testOpenIfChangedReuseAfterRecreate() throws Exception {
  // tests that if the taxonomy is recreated, no data is reused from the previous taxonomy
  Directory dir = newDirectory();
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
  FacetLabel cp_a = new FacetLabel("a");
  writer.addCategory(cp_a);
  writer.close();
  
  DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir);
  // fill r1's caches
  assertEquals(1, r1.getOrdinal(cp_a));
  assertEquals(cp_a, r1.getPath(1));
  
  // now recreate, add a different category
  writer = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
  FacetLabel cp_b = new FacetLabel("b");
  writer.addCategory(cp_b);
  writer.close();
  
  DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
  assertNotNull(r2);
  
  // fill r2's caches
  assertEquals(1, r2.getOrdinal(cp_b));
  assertEquals(cp_b, r2.getPath(1));
  
  // check that r1 doesn't see cp_b
  assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
  assertEquals(cp_a, r1.getPath(1));

  // check that r2 doesn't see cp_a
  assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
  assertEquals(cp_b, r2.getPath(1));

  r2.close();
  r1.close();
  dir.close();
}

Source File: TestPersistentSnapshotDeletionPolicy.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testNoSnapshotInfos() throws Exception {
  Directory dir = newDirectory();
  new PersistentSnapshotDeletionPolicy(
      new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode.CREATE);
  dir.close();
}

Source File: SolrSnapshotMetaDataManager.java From lucene-solr with Apache License 2.0

5 votes

/**
 * A constructor.
 *
 * @param dir The directory where the snapshot meta-data is stored.
 * @param mode CREATE If previous meta-data should be erased.
 *             APPEND If previous meta-data should be read and updated.
 *             CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
 *                              Updates the existing structure if one exists.
 * @throws IOException in case of errors.
 */
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
  this.solrCore = solrCore;
  this.dir = dir;

  if (mode == OpenMode.CREATE) {
    deleteSnapshotMetadataFiles();
  }

  loadFromSnapshotMetadataFile();

  if (mode == OpenMode.APPEND && nextWriteGen == 0) {
    throw new IllegalStateException("no snapshots stored in this directory");
  }
}

Source File: IndexFiles.java From lucene-solr with Apache License 2.0

4 votes

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
  try (InputStream stream = Files.newInputStream(file)) {
    // make a new, empty document
    Document doc = new Document();
    
    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field pathField = new StringField("path", file.toString(), Field.Store.YES);
    doc.add(pathField);
    
    // Add the last modified date of the file a field named "modified".
    // Use a LongPoint that is indexed (i.e. efficiently filterable with
    // PointRangeQuery).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongPoint("modified", lastModified));
    
    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
    
    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
      // New index, so we just add the document (no old document can be there):
      System.out.println("adding " + file);
      writer.addDocument(doc);
    } else {
      // Existing index (an old copy of this document may have been indexed) so 
      // we use updateDocument instead to replace the old one matching the exact 
      // path, if present:
      System.out.println("updating " + file);
      writer.updateDocument(new Term("path", file.toString()), doc);
    }
  }
}

Source File: TestMultipleIndexFields.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testTwoCustomsSameField() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Band", "$music");
  config.setIndexFieldName("Composer", "$music");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
  facetsMap.put("Band", facets2);
  facetsMap.put("Composer", facets2);
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);

  assertOrdinalsExist("$facets", ir);
  assertOrdinalsExist("$music", ir);
  assertOrdinalsExist("$music", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Source File: TestMultipleIndexFields.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testDifferentFieldsAndText() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();

  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Band", "$bands");
  config.setIndexFieldName("Composer", "$composers");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  facetsMap.put("Band", getTaxonomyFacetCounts(tr, config, sfc, "$bands"));
  facetsMap.put("Composer", getTaxonomyFacetCounts(tr, config, sfc, "$composers"));
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);
  assertOrdinalsExist("$facets", ir);
  assertOrdinalsExist("$bands", ir);
  assertOrdinalsExist("$composers", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Source File: TestMultipleIndexFields.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testSomeSameSomeDifferent() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Band", "$music");
  config.setIndexFieldName("Composer", "$music");
  config.setIndexFieldName("Author", "$literature");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
  facetsMap.put("Band", facets2);
  facetsMap.put("Composer", facets2);
  facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$literature"));
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);
  assertOrdinalsExist("$music", ir);
  assertOrdinalsExist("$literature", ir);

  iw.close();
  IOUtils.close(tr, ir, iw, tw, indexDir, taxoDir);
}

Source File: TestDirectoryTaxonomyReader.java From lucene-solr with Apache License 2.0

4 votes

private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
  Directory dir = null;
  TaxonomyWriter tw = null;
  TaxonomyReader tr = null;
  
  // prepare a few categories
  int  n = 10;
  FacetLabel[] cp = new FacetLabel[n];
  for (int i=0; i<n; i++) {
    cp[i] = new FacetLabel("a", Integer.toString(i));
  }
  
  try {
    dir = newDirectory();
    
    tw = new DirectoryTaxonomyWriter(dir);
    tw.addCategory(new FacetLabel("a"));
    tw.close();
    
    tr = new DirectoryTaxonomyReader(dir);
    int baseNumCategories = tr.getSize();
    
    for (int i=0; i<n; i++) {
      int k = random.nextInt(n);
      tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
      for (int j = 0; j <= k; j++) {
        tw.addCategory(cp[j]);
      }
      tw.close();
      if (closeReader) {
        tr.close();
        tr = new DirectoryTaxonomyReader(dir);
      } else {
        TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
        assertNotNull(newtr);
        tr.close();
        tr = newtr;
      }
      assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize());
    }
  } finally {
    IOUtils.close(tr, tw, dir);
  }
}

Source File: TestPersistentSnapshotDeletionPolicy.java From lucene-solr with Apache License 2.0

4 votes

private SnapshotDeletionPolicy getDeletionPolicy(Directory dir) throws IOException {
  return new PersistentSnapshotDeletionPolicy(
      new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode.CREATE);
}

Source File: BuildIndex.java From fnlp with GNU Lesser General Public License v3.0

4 votes

/**
 * @param args
 * @throws IOException 
 * @throws LoadModelException 
 */
public static void main(String[] args) throws IOException, LoadModelException {
	String indexPath = "../tmp/lucene";
	System.out.println("Indexing to directory '" + indexPath  + "'...");
	Date start = new Date();
	Directory dir = FSDirectory.open(new File(indexPath));//Dirctory dir-->FSDirectory
	//需要先初始化 CNFactory
	CNFactory factory = CNFactory.getInstance("../models",Models.SEG_TAG);
	Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47);
	IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
	iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
	IndexWriter writer = new IndexWriter(dir, iwc);

	String[] strs = new String[]{
			"终端的保修期为一年。",
			"凡在保修期内非人为损坏，均可免费保修。",
			"人为损坏的终端将视情况收取维修费用。",
			"中国"
	};
	//Date start = new Date();
	for(int i=0;i<strs.length;i++){

		Document doc = new Document();

		Field field = new TextField("content", strs[i] , Field.Store.YES);
		doc.add(field);
		if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
			writer.addDocument(doc);
		} else {
			writer.updateDocument(new Term("content",strs[i]), doc);
		}
	}
	writer.close();
	
	//！！这句话是不是漏了
	//dir.close();
	//！！这句话是不是漏了

	Date end = new Date();
	System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

Java Code Examples for org.apache.lucene.index.IndexWriterConfig.OpenMode#CREATE