org.apache.lucene.index.IndexWriterConfig#setIndexSort

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

6 votes

private void testIndexSortDocValuesWithSingleValue(boolean reverse) throws IOException{
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG, reverse));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", 42));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  assertEquals(1, searcher.count(createQuery("field", 42, 43)));
  assertEquals(1, searcher.count(createQuery("field", 42, 42)));
  assertEquals(0, searcher.count(createQuery("field", 41, 41)));
  assertEquals(0, searcher.count(createQuery("field", 43, 43)));

  writer.close();
  reader.close();
  dir.close();
}

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Test that the index sort optimization is not activated when the sort is
 * on the wrong field.
 */
public void testIndexSortOnWrongField() throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("other-field", SortField.Type.LONG));
  iwc.setIndexSort(indexSort);

  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  writer.addDocument(createDocument("field", 0));

  testIndexSortOptimizationDeactivated(writer);

  writer.close();
  dir.close();
}

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Test that the index sort optimization is not activated when some documents
 * have multiple values.
 */
public void testMultiDocValues() throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  Document doc = new Document();
  doc.add(new SortedNumericDocValuesField("field", 0));
  doc.add(new SortedNumericDocValuesField("field", 10));
  writer.addDocument(doc);

  testIndexSortOptimizationDeactivated(writer);

  writer.close();
  dir.close();
}

Source File: test.java From vscode-extension with MIT License

5 votes

private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
            new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setSimilarity(engineConfig.getSimilarity());
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    if (config().getIndexSort() != null) {
        iwc.setIndexSort(config().getIndexSort());
    }
    return iwc;
}

Source File: AnalyzingInfixSuggester.java From lucene-solr with Apache License 2.0

5 votes

/** Override this to customize index settings, e.g. which
 *  codec to use. */
protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
  IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
  iwc.setOpenMode(openMode);

  // This way all merged segments will be sorted at
  // merge time, allow for per-segment early termination
  // when those segments are searched:
  iwc.setIndexSort(SORT);

  return iwc;
}

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testIndexSortMissingValues() throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
  sortField.setMissingValue(random().nextLong());
  iwc.setIndexSort(new Sort(sortField));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", -80));
  writer.addDocument(createDocument("field", -5));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 35));

  writer.addDocument(createDocument("other-field", 0));
  writer.addDocument(createDocument("other-field", 10));
  writer.addDocument(createDocument("other-field", 20));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  assertEquals(2, searcher.count(createQuery("field", -70, 0)));
  assertEquals(2, searcher.count(createQuery("field", -2, 35)));

  assertEquals(4, searcher.count(createQuery("field", -80, 35)));
  assertEquals(4, searcher.count(createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE)));

  writer.close();
  reader.close();
  dir.close();
}

Source File: SolrIndexConfig.java From lucene-solr with Apache License 2.0

5 votes

public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
  IndexSchema schema = core.getLatestSchema();
  IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
  if (maxBufferedDocs != -1)
    iwc.setMaxBufferedDocs(maxBufferedDocs);

  if (ramBufferSizeMB != -1)
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

  if (ramPerThreadHardLimitMB != -1) {
    iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
  }

  iwc.setSimilarity(schema.getSimilarity());
  MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
  iwc.setMergePolicy(mergePolicy);
  MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
  iwc.setMergeScheduler(mergeScheduler);
  iwc.setInfoStream(infoStream);

  if (mergePolicy instanceof SortingMergePolicy) {
    Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
    iwc.setIndexSort(indexSort);
  }

  iwc.setUseCompoundFile(useCompoundFile);

  if (mergedSegmentWarmerInfo != null) {
    // TODO: add infostream -> normal logging system (there is an issue somewhere)
    @SuppressWarnings({"rawtypes"})
    IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                                                                      IndexReaderWarmer.class,
                                                                      null,
                                                                      new Class[] { InfoStream.class },
                                                                      new Object[] { iwc.getInfoStream() });
    iwc.setMergedSegmentWarmer(warmer);
  }

  return iwc;
}

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

4 votes

public void testSameHitsAsPointRangeQuery() throws IOException {
  final int iters = atLeast(10);
  for (int iter = 0; iter < iters; ++iter) {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    boolean reverse = random().nextBoolean();
    SortField sortField = new SortedNumericSortField("dv", SortField.Type.LONG, reverse);
    sortField.setMissingValue(random().nextLong());
    iwc.setIndexSort(new Sort(sortField));

    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      final int numValues = TestUtil.nextInt(random(), 0, 1);
      for (int j = 0; j < numValues; ++j) {
        final long value = TestUtil.nextLong(random(), -100, 10000);
        doc.add(new SortedNumericDocValuesField("dv", value));
        doc.add(new LongPoint("idx", value));
      }
      iw.addDocument(doc);
    }
    if (random().nextBoolean()) {
      iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
    }
    final IndexReader reader = iw.getReader();
    final IndexSearcher searcher = newSearcher(reader, false);
    iw.close();

    for (int i = 0; i < 100; ++i) {
      final long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
      final long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
      final Query q1 = LongPoint.newRangeQuery("idx", min, max);
      final Query q2 = createQuery("dv", min, max);
      assertSameHits(searcher, q1, q2, false);
    }

    reader.close();
    dir.close();
  }
}

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

4 votes

public void testIndexSortDocValuesWithEvenLength(boolean reverse) throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG, reverse));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", -80));
  writer.addDocument(createDocument("field", -5));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 30));
  writer.addDocument(createDocument("field", 35));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  // Test ranges consisting of one value.
  assertEquals(1, searcher.count(createQuery("field", -80, -80)));
  assertEquals(1, searcher.count(createQuery("field", -5, -5)));
  assertEquals(2, searcher.count(createQuery("field", 0, 0)));
  assertEquals(1, searcher.count(createQuery("field", 30, 30)));
  assertEquals(1, searcher.count(createQuery("field", 35, 35)));

  assertEquals(0, searcher.count(createQuery("field", -90, -90)));
  assertEquals(0, searcher.count(createQuery("field", 5, 5)));
  assertEquals(0, searcher.count(createQuery("field", 40, 40)));

  // Test the lower end of the document value range.
  assertEquals(2, searcher.count(createQuery("field", -90, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -4)));
  assertEquals(1, searcher.count(createQuery("field", -70, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -5)));

  // Test the upper end of the document value range.
  assertEquals(1, searcher.count(createQuery("field", 25, 34)));
  assertEquals(2, searcher.count(createQuery("field", 25, 35)));
  assertEquals(2, searcher.count(createQuery("field", 25, 36)));
  assertEquals(2, searcher.count(createQuery("field", 30, 35)));

  // Test multiple occurrences of the same value.
  assertEquals(2, searcher.count(createQuery("field", -4, 4)));
  assertEquals(2, searcher.count(createQuery("field", -4, 0)));
  assertEquals(2, searcher.count(createQuery("field", 0, 4)));
  assertEquals(3, searcher.count(createQuery("field", 0, 30)));

  // Test ranges that span all documents.
  assertEquals(6, searcher.count(createQuery("field", -80, 35)));
  assertEquals(6, searcher.count(createQuery("field", -90, 40)));

  writer.close();
  reader.close();
  dir.close();
}

Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

4 votes

public void testIndexSortDocValuesWithOddLength(boolean reverse) throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG, reverse));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", -80));
  writer.addDocument(createDocument("field", -5));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 5));
  writer.addDocument(createDocument("field", 30));
  writer.addDocument(createDocument("field", 35));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  // Test ranges consisting of one value.
  assertEquals(1, searcher.count(createQuery("field", -80, -80)));
  assertEquals(1, searcher.count(createQuery("field", -5, -5)));
  assertEquals(2, searcher.count(createQuery("field", 0, 0)));
  assertEquals(1, searcher.count(createQuery("field", 5, 5)));
  assertEquals(1, searcher.count(createQuery("field", 30, 30)));
  assertEquals(1, searcher.count(createQuery("field", 35, 35)));

  assertEquals(0, searcher.count(createQuery("field", -90, -90)));
  assertEquals(0, searcher.count(createQuery("field", 6, 6)));
  assertEquals(0, searcher.count(createQuery("field", 40, 40)));

  // Test the lower end of the document value range.
  assertEquals(2, searcher.count(createQuery("field", -90, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -4)));
  assertEquals(1, searcher.count(createQuery("field", -70, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -5)));

  // Test the upper end of the document value range.
  assertEquals(1, searcher.count(createQuery("field", 25, 34)));
  assertEquals(2, searcher.count(createQuery("field", 25, 35)));
  assertEquals(2, searcher.count(createQuery("field", 25, 36)));
  assertEquals(2, searcher.count(createQuery("field", 30, 35)));

  // Test multiple occurrences of the same value.
  assertEquals(2, searcher.count(createQuery("field", -4, 4)));
  assertEquals(2, searcher.count(createQuery("field", -4, 0)));
  assertEquals(2, searcher.count(createQuery("field", 0, 4)));
  assertEquals(4, searcher.count(createQuery("field", 0, 30)));

  // Test ranges that span all documents.
  assertEquals(7, searcher.count(createQuery("field", -80, 35)));
  assertEquals(7, searcher.count(createQuery("field", -90, 40)));

  writer.close();
  reader.close();
  dir.close();
}

Source File: TestTopFieldCollectorEarlyTermination.java From lucene-solr with Apache License 2.0

4 votes

private void createRandomIndex(boolean singleSortedSegment) throws IOException {
  dir = newDirectory();
  numDocs = atLeast(150);
  final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
  Set<String> randomTerms = new HashSet<>();
  while (randomTerms.size() < numTerms) {
    randomTerms.add(TestUtil.randomSimpleString(random()));
  }
  terms = new ArrayList<>(randomTerms);
  final long seed = random().nextLong();
  final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
  if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
    // MockRandomMP randomly wraps the leaf readers which makes merging angry
    iwc.setMergePolicy(newTieredMergePolicy());
  }
  iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
  iwc.setIndexSort(sort);
  iw = new RandomIndexWriter(new Random(seed), dir, iwc);
  iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
  for (int i = 0; i < numDocs; ++i) {
    final Document doc = randomDocument();
    iw.addDocument(doc);
    if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
      iw.commit();
    }
    if (random().nextInt(15) == 0) {
      final String term = RandomPicks.randomFrom(random(), terms);
      iw.deleteDocuments(new Term("s", term));
    }
  }
  if (singleSortedSegment) {
    iw.forceMerge(1);
  }
  else if (random().nextBoolean()) {
    iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
  }
  reader = iw.getReader();
  if (reader.numDocs() == 0) {
    iw.addDocument(new Document());
    reader.close();
    reader = iw.getReader();
  }
}

Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setIndexSort()