org.apache.lucene.index.IndexWriterConfig#setMergePolicy

Source File: Index.java From dacapobench with Apache License 2.0

6 votes

/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriterConfig IWConfig = new IndexWriterConfig();
  IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
  IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
  IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.forceMerge(1);
  }
  writer.close();
}

Source File: TestSuggestField.java From lucene-solr with Apache License 2.0

6 votes

static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  Codec filterCodec = new Lucene86Codec() {
    CompletionPostingsFormat.FSTLoadMode fstLoadMode =
        RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
    PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);

    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (suggestFields.contains(field)) {
        return postingsFormat;
      }
      return super.getPostingsFormatForField(field);
    }
  };
  iwc.setCodec(filterCodec);
  return iwc;
}

Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testLongValuesSourceEmptyReader() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  // Make sure the index is created?
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();

  assertNull(inputIterator.next());
  assertEquals(inputIterator.weight(), 0);
  assertNull(inputIterator.payload());

  IOUtils.close(ir, analyzer, dir);
}

Source File: DocumentValueSourceDictionaryTest.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testValueSourceEmptyReader() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  // Make sure the index is created?
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();

  assertNull(inputIterator.next());
  assertEquals(inputIterator.weight(), 0);
  assertNull(inputIterator.payload());

  IOUtils.close(ir, analyzer, dir);
}

Source File: TestConjunctions.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void setUp() throws Exception {
  super.setUp();
  analyzer = new MockAnalyzer(random());
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
  writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
  writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new TFSimilarity());
}

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

5 votes

@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
  iwConfig.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
  for (String [] doc : documents) {
    Document document = new Document();
    document.add(new StringField("id", doc[0], Field.Store.NO));
    document.add(new SortedDocValuesField("id", new BytesRef(doc[0])));
    document.add(new NumericDocValuesField("double", Double.doubleToRawLongBits(Double.parseDouble(doc[1]))));
    document.add(new NumericDocValuesField("float", Float.floatToRawIntBits(Float.parseFloat(doc[2]))));
    document.add(new NumericDocValuesField("int", Integer.parseInt(doc[3])));
    document.add(new NumericDocValuesField("long", Long.parseLong(doc[4])));
    document.add(new StringField("string", doc[5], Field.Store.NO));
    document.add(new SortedDocValuesField("string", new BytesRef(doc[5])));
    document.add(new TextField("text", doc[6], Field.Store.NO));
    document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[7]))));
    document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[8]))));
    document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[9]))));
    document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[7]))));
    document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[8]))));
    document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[9]))));
    document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[10])));
    document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[11])));
    document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[12])));
    document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[10])));
    document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[11])));
    document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[12])));
    iw.addDocument(document);
  }
  
  reader = iw.getReader();
  searcher = newSearcher(reader);
  iw.close();
}

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

5 votes

private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) throws IOException {
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(maxBufferedDocs);
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  return new IndexWriter(dir, conf);
}

Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testMultiValuedField() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
  writer.commit();
  writer.close();

  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();
  BytesRef f;
  Iterator<Suggestion> suggestionsIter = suggestions.iterator();
  while((f = inputIterator.next())!=null) {
    Suggestion nextSuggestion = suggestionsIter.next();
    assertTrue(f.equals(nextSuggestion.term));
    long weight = nextSuggestion.weight;
    assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
    assertEquals(inputIterator.payload(), nextSuggestion.payload);
    assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
  }
  assertFalse(suggestionsIter.hasNext());
  IOUtils.close(ir, analyzer, dir);
}

Source File: OverviewTestBase.java From lucene-solr with Apache License 2.0

5 votes

private Path createIndex() throws IOException {
  Path indexDir = createTempDir();

  Directory dir = newFSDirectory(indexDir);
  IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
  config.setMergePolicy(NoMergePolicy.INSTANCE);  // see LUCENE-8998
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);

  Document doc1 = new Document();
  doc1.add(newStringField("f1", "1", Field.Store.NO));
  doc1.add(newTextField("f2", "a b c d e", Field.Store.NO));
  writer.addDocument(doc1);

  Document doc2 = new Document();
  doc2.add(newStringField("f1", "2", Field.Store.NO));
  doc2.add(new TextField("f2", "a c", Field.Store.NO));
  writer.addDocument(doc2);

  Document doc3 = new Document();
  doc3.add(newStringField("f1", "3", Field.Store.NO));
  doc3.add(newTextField("f2", "a f", Field.Store.NO));
  writer.addDocument(doc3);

  Map<String, String> userData = new HashMap<>();
  userData.put("data", "val");
  writer.w.setLiveCommitData(userData.entrySet());

  writer.commit();

  writer.close();
  dir.close();

  return indexDir;
}

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

5 votes

public void testSegmentsWithoutCategoriesOrResults() throws Exception {
  // tests the accumulator when there are segments with no results
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges
  IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  FacetsConfig config = new FacetsConfig();
  indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 2nd segment, with content, no categories
  indexTwoDocs(taxoWriter, indexWriter, config, true);  // 3rd segment ok
  indexTwoDocs(taxoWriter, indexWriter, null, false);        // 4th segment, no content, or categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 5th segment, with content, no categories
  indexTwoDocs(taxoWriter, indexWriter, config, true);  // 6th segment, with content, with categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 7th segment, with content, no categories
  indexWriter.close();
  IOUtils.close(taxoWriter);

  DirectoryReader indexReader = DirectoryReader.open(indexDir);
  TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
  IndexSearcher indexSearcher = newSearcher(indexReader);
  
  // search for "f:a", only segments 1 and 3 should match results
  Query q = new TermQuery(new Term("f", "a"));
  FacetsCollector sfc = new FacetsCollector();
  indexSearcher.search(q, sfc);
  Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
  FacetResult result = facets.getTopChildren(10, "A");
  assertEquals("wrong number of children", 2, result.labelValues.length);
  for (LabelAndValue labelValue : result.labelValues) {
    assertEquals("wrong weight for child " + labelValue.label, 2, labelValue.value.intValue());
  }

  IOUtils.close(indexReader, taxoReader, indexDir, taxoDir);
}

Source File: RecoverySourcePruneMergePolicyTests.java From crate with Apache License 2.0

5 votes

public void testPruneNone() throws IOException {
    try (Directory dir = newDirectory()) {
        IndexWriterConfig iwc = newIndexWriterConfig();
        iwc.setMergePolicy(new RecoverySourcePruneMergePolicy("extra_source",
                                                              () -> new MatchAllDocsQuery(), iwc.getMergePolicy()));
        try (IndexWriter writer = new IndexWriter(dir, iwc)) {
            for (int i = 0; i < 20; i++) {
                if (i > 0 && randomBoolean()) {
                    writer.flush();
                }
                Document doc = new Document();
                doc.add(new StoredField("source", "hello world"));
                doc.add(new StoredField("extra_source", "hello world"));
                doc.add(new NumericDocValuesField("extra_source", 1));
                writer.addDocument(doc);
            }
            writer.forceMerge(1);
            writer.commit();
            try (DirectoryReader reader = DirectoryReader.open(writer)) {
                assertEquals(1, reader.leaves().size());
                NumericDocValues extra_source = reader.leaves().get(0).reader().getNumericDocValues("extra_source");
                assertNotNull(extra_source);
                for (int i = 0; i < reader.maxDoc(); i++) {
                    Document document = reader.document(i);
                    Set<String> collect = document.getFields().stream().map(IndexableField::name).collect(Collectors.toSet());
                    assertTrue(collect.contains("source"));
                    assertTrue(collect.contains("extra_source"));
                    assertEquals(i, extra_source.nextDoc());
                }
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, extra_source.nextDoc());
            }
        }
    }
}

Source File: TestCheckJoinIndex.java From lucene-solr with Apache License 2.0

5 votes

public void testInconsistentDeletes() throws IOException {
  final Directory dir = newDirectory();
  final IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setMergePolicy(NoMergePolicy.INSTANCE); // so that deletions don't trigger merges
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

  List<Document> block = new ArrayList<>();
  final int numChildren = TestUtil.nextInt(random(), 1, 3);
  for (int i = 0; i < numChildren; ++i) {
    Document doc = new Document();
    doc.add(new StringField("child", Integer.toString(i), Store.NO));
    block.add(doc);
  }
  Document parent = new Document();
  parent.add(new StringField("parent", "true", Store.NO));
  block.add(parent);
  w.addDocuments(block);

  if (random().nextBoolean()) {
    w.deleteDocuments(new Term("parent", "true"));
  } else {
    // delete any of the children
    w.deleteDocuments(new Term("child", Integer.toString(random().nextInt(numChildren))));
  }

  final IndexReader reader = w.getReader();
  w.close();

  BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("parent", "true")));
  try {
    expectThrows(IllegalStateException.class, () -> CheckJoinIndex.check(reader, parentsFilter));
  } finally {
    reader.close();
    dir.close();
  }
}

Source File: TestMultiPhraseEnum.java From lucene-solr with Apache License 2.0

5 votes

/** Tests union on one document  */
public void testOneDocument() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setMergePolicy(newLogMergePolicy());
  IndexWriter writer = new IndexWriter(dir, iwc);
  
  Document doc = new Document();
  doc.add(new TextField("field", "foo bar", Field.Store.NO));
  writer.addDocument(doc);
  
  DirectoryReader ir = DirectoryReader.open(writer);
  writer.close();

  PostingsEnum p1 = getOnlyLeafReader(ir).postings(new Term("field", "foo"), PostingsEnum.POSITIONS);
  PostingsEnum p2 = getOnlyLeafReader(ir).postings(new Term("field", "bar"), PostingsEnum.POSITIONS);
  PostingsEnum union = new MultiPhraseQuery.UnionPostingsEnum(Arrays.asList(p1, p2));
  
  assertEquals(-1, union.docID());
  
  assertEquals(0, union.nextDoc());
  assertEquals(2, union.freq());
  assertEquals(0, union.nextPosition());
  assertEquals(1, union.nextPosition());
  
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, union.nextDoc());
  
  ir.close();
  dir.close();
}

Source File: SolrSnapshotManager.java From lucene-solr with Apache License 2.0

5 votes

/**
 * This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
 *
 * @param core The Solr core
 * @param dir The index directory storing the snapshot.
 * @throws IOException in case of I/O errors.
 */

@SuppressWarnings({"try", "unused"})
private static void deleteSnapshotIndexFiles(SolrCore core, Directory dir, IndexDeletionPolicy delPolicy) throws IOException {
  IndexWriterConfig conf = core.getSolrConfig().indexConfig.toIndexWriterConfig(core);
  conf.setOpenMode(OpenMode.APPEND);
  conf.setMergePolicy(NoMergePolicy.INSTANCE);//Don't want to merge any commits here!
  conf.setIndexDeletionPolicy(delPolicy);
  conf.setCodec(core.getCodec());
  try (SolrIndexWriter iw = new SolrIndexWriter("SolrSnapshotCleaner", dir, conf)) {
    // Do nothing. The only purpose of opening index writer is to invoke the Lucene IndexDeletionPolicy#onInit
    // method so that we can cleanup the files associated with specified index commit.
    // Note the index writer creates a new commit during the close() operation (which is harmless).
  }
}

Source File: TestFieldCache.java From lucene-solr with Apache License 2.0

4 votes

public void testIntFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  IntPoint field = new IntPoint("f", 0);
  doc.add(field);
  final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final int v;
    switch (random().nextInt(10)) {
      case 0:
        v = Integer.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Integer.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextInt(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setIntValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, ints.nextDoc());
      assertEquals(values[i], ints.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, ints.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}

Source File: TestControlledRealTimeReopenThread.java From lucene-solr with Apache License 2.0

4 votes

public void testThreadStarvationNoDeleteNRTReader() throws IOException, InterruptedException {
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMergePolicy(NoMergePolicy.INSTANCE);
  Directory d = newDirectory();
  final CountDownLatch latch = new CountDownLatch(1);
  final CountDownLatch signal = new CountDownLatch(1);

  LatchedIndexWriter writer = new LatchedIndexWriter(d, conf, latch, signal);
  final SearcherManager manager = new SearcherManager(writer, false, false, null);
  Document doc = new Document();
  doc.add(newTextField("test", "test", Field.Store.YES));
  writer.addDocument(doc);
  manager.maybeRefresh();
  Thread t = new Thread() {
    @Override
    public void run() {
      try {
        signal.await();
        manager.maybeRefresh();
        writer.deleteDocuments(new TermQuery(new Term("foo", "barista")));
        manager.maybeRefresh(); // kick off another reopen so we inc. the internal gen
      } catch (Exception e) {
        e.printStackTrace();
      } finally {
        latch.countDown(); // let the add below finish
      }
    }
  };
  t.start();
  writer.waitAfterUpdate = true; // wait in addDocument to let some reopens go through

  final long lastGen = writer.updateDocument(new Term("foo", "bar"), doc); // once this returns the doc is already reflected in the last reopen

  // We now eagerly resolve deletes so the manager should see it after update:
  assertTrue(manager.isSearcherCurrent());
  
  IndexSearcher searcher = manager.acquire();
  try {
    assertEquals(2, searcher.getIndexReader().numDocs());
  } finally {
    manager.release(searcher);
  }
  final ControlledRealTimeReopenThread<IndexSearcher> thread = new ControlledRealTimeReopenThread<>(writer, manager, 0.01, 0.01);
  thread.start(); // start reopening
  if (VERBOSE) {
    System.out.println("waiting now for generation " + lastGen);
  }
  
  final AtomicBoolean finished = new AtomicBoolean(false);
  Thread waiter = new Thread() {
    @Override
    public void run() {
      try {
        thread.waitForGeneration(lastGen);
      } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
        throw new RuntimeException(ie);
      }
      finished.set(true);
    }
  };
  waiter.start();
  manager.maybeRefresh();
  waiter.join(1000);
  if (!finished.get()) {
    waiter.interrupt();
    fail("thread deadlocked on waitForGeneration");
  }
  thread.close();
  thread.join();
  writer.close();
  IOUtils.close(manager, d);
}

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}

Source File: TestSearcherTaxonomyManager.java From lucene-solr with Apache License 2.0

4 votes

public void testNRT() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  // Don't allow tiny maxBufferedDocs; it can make this
  // test too slow:
  iwc.setMaxBufferedDocs(Math.max(500, iwc.getMaxBufferedDocs()));

  // MockRandom/AlcololicMergePolicy are too slow:
  TieredMergePolicy tmp = new TieredMergePolicy();
  tmp.setFloorSegmentMB(.001);
  iwc.setMergePolicy(tmp);
  final IndexWriter w = new IndexWriter(dir, iwc);
  final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
  final FacetsConfig config = new FacetsConfig();
  config.setMultiValued("field", true);
  final AtomicBoolean stop = new AtomicBoolean();

  // How many unique facets to index before stopping:
  final int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

  Thread indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

  final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);

  Thread reopener = new Thread() {
      @Override
      public void run() {
        while(!stop.get()) {
          try {
            // Sleep for up to 20 msec:
            Thread.sleep(random().nextInt(20));

            if (VERBOSE) {
              System.out.println("TEST: reopen");
            }

            mgr.maybeRefresh();

            if (VERBOSE) {
              System.out.println("TEST: reopen done");
            }
          } catch (Exception ioe) {
            throw new RuntimeException(ioe);
          }
        }
      }
    };

  reopener.setName("reopener");
  reopener.start();

  indexer.setName("indexer");
  indexer.start();

  try {
    while (!stop.get()) {
      SearcherAndTaxonomy pair = mgr.acquire();
      try {
        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
        FacetsCollector sfc = new FacetsCollector();
        pair.searcher.search(new MatchAllDocsQuery(), sfc);
        Facets facets = getTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
        FacetResult result = facets.getTopChildren(10, "field");
        if (pair.searcher.getIndexReader().numDocs() > 0) { 
          //System.out.println(pair.taxonomyReader.getSize());
          assertTrue(result.childCount > 0);
          assertTrue(result.labelValues.length > 0);
        }

        //if (VERBOSE) {
        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
        //}
      } finally {
        mgr.release(pair);
      }
    }
  } finally {
    indexer.join();
    reopener.join();
  }

  if (VERBOSE) {
    System.out.println("TEST: now stop");
  }

  w.close();
  IOUtils.close(mgr, tw, taxoDir, dir);
}

Source File: TestBlockJoin.java From lucene-solr with Apache License 2.0

4 votes

public void testEmptyChildFilter() throws Exception {
  final Directory dir = newDirectory();
  final IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
  config.setMergePolicy(NoMergePolicy.INSTANCE);
  // we don't want to merge - since we rely on certain segment setup
  final IndexWriter w = new IndexWriter(dir, config);

  final List<Document> docs = new ArrayList<>();

  docs.add(makeJob("java", 2007));
  docs.add(makeJob("python", 2010));
  docs.add(makeResume("Lisa", "United Kingdom"));
  w.addDocuments(docs);

  docs.clear();
  docs.add(makeJob("ruby", 2005));
  docs.add(makeJob("java", 2006));
  docs.add(makeResume("Frank", "United States"));
  w.addDocuments(docs);
  w.commit();

  IndexReader r = DirectoryReader.open(w);
  w.close();
  IndexSearcher s = newSearcher(r);
  BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
  CheckJoinIndex.check(r, parentsFilter);

  BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
  childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
  childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));

  ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);

  BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
  fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
  fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST));
  TopDocs topDocs = s.search(fullQuery.build(), 2);
  assertEquals(2, topDocs.totalHits.value);
  assertEquals(asSet("Lisa", "Frank"),
      asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));

  ParentChildrenBlockJoinQuery childrenQuery =
      new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
  TopDocs matchingChildren = s.search(childrenQuery, 1);
  assertEquals(1, matchingChildren.totalHits.value);
  assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));

  childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
  matchingChildren = s.search(childrenQuery, 1);
  assertEquals(1, matchingChildren.totalHits.value);
  assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));

  r.close();
  dir.close();
}

Source File: TestFieldCache.java From lucene-solr with Apache License 2.0

4 votes

public void testLongFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  LongPoint field = new LongPoint("f", 0L);
  StoredField field2 = new StoredField("f", 0L);
  doc.add(field);
  doc.add(field2);
  final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final long v;
    switch (random().nextInt(10)) {
      case 0:
        v = Long.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Long.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextLong(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setLongValue(v);
      field2.setLongValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, longs.nextDoc());
      assertEquals(values[i], longs.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, longs.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}

Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setMergePolicy()