org.apache.lucene.index.IndexWriter#forceMerge

Source File: BaseShapeTestCase.java From lucene-solr with Apache License 2.0

6 votes

protected void indexRandomShapes(IndexWriter w, Object... shapes) throws Exception {
  Set<Integer> deleted = new HashSet<>();
  for (int id = 0; id < shapes.length; ++id) {
    Document doc = new Document();
    doc.add(newStringField("id", "" + id, Field.Store.NO));
    doc.add(new NumericDocValuesField("id", id));
    if (shapes[id] != null) {
      addShapeToDoc(FIELD_NAME, doc, shapes[id]);
    }
    w.addDocument(doc);
    if (id > 0 && random().nextInt(100) == 42) {
      int idToDelete = random().nextInt(id);
      w.deleteDocuments(new Term("id", ""+idToDelete));
      deleted.add(idToDelete);
      if (VERBOSE) {
        System.out.println("   delete id=" + idToDelete);
      }
    }
  }

  if (randomBoolean()) {
    w.forceMerge(1);
  }
}

Source File: TestMergeSchedulerExternal.java From lucene-solr with Apache License 2.0

6 votes

public void testCustomMergeScheduler() throws Exception {
  // we don't really need to execute anything, just to make sure the custom MS
  // compiles. But ensure that it can be used as well, e.g., no other hidden
  // dependencies or something. Therefore, don't use any random API !
  Directory dir = new ByteBuffersDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(null);
  conf.setMergeScheduler(new ReportingMergeScheduler());
  IndexWriter writer = new IndexWriter(dir, conf);
  writer.addDocument(new Document());
  writer.commit(); // trigger flush
  writer.addDocument(new Document());
  writer.commit(); // trigger flush
  writer.forceMerge(1);
  writer.close();
  dir.close();
}

Source File: PayloadHelper.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Sets up a RAM-resident Directory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
 * and analyzes them using the PayloadAnalyzer
 * @param similarity The Similarity class to use in the Searcher
 * @param numDocs The num docs to add
 * @return An IndexSearcher
 */
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
  Directory directory = new MockDirectoryWrapper(random, new ByteBuffersDirectory());
  PayloadAnalyzer analyzer = new PayloadAnalyzer();

  // TODO randomize this
  IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
      analyzer).setSimilarity(similarity));
  // writer.infoStream = System.out;
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
    doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES));
    doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
    writer.addDocument(doc);
  }
  writer.forceMerge(1);
  reader = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader));
  searcher.setSimilarity(similarity);
  return searcher;
}

Source File: TestLucene84PostingsFormat.java From lucene-solr with Apache License 2.0

6 votes

/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
  Directory d = newDirectory();
  IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0;i<25;i++) {
    Document doc = new Document();
    doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
    doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
    w.addDocument(doc);
  }
  w.forceMerge(1);

  DirectoryReader r = DirectoryReader.open(w);
  assertEquals(1, r.leaves().size());
  FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
  // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
  Stats stats = field.getStats();
  assertEquals(0, stats.floorBlockCount);
  assertEquals(2, stats.nonFloorBlockCount);
  r.close();
  w.close();
  d.close();
}

Source File: TestFieldCacheSort.java From lucene-solr with Apache License 2.0

6 votes

/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
  Directory indexStore = newDirectory();
  IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0; i<5; i++) {
      Document doc = new Document();
      doc.add(new StringField("string", "a"+i, Field.Store.NO));
      doc.add(new StringField("string", "b"+i, Field.Store.NO));
      writer.addDocument(doc);
  }
  writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
  writer.close();
  Sort sort = new Sort(
      new SortField("string", SortField.Type.STRING),
      SortField.FIELD_DOC);
  IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
                       Collections.singletonMap("string", Type.SORTED));
  IndexSearcher searcher = new IndexSearcher(reader);
  expectThrows(IllegalStateException.class, () -> {
    searcher.search(new MatchAllDocsQuery(), 500, sort);
  });
  reader.close();
  indexStore.close();
}

Source File: TestScorerPerf.java From lucene-solr with Apache License 2.0

6 votes

public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
  int[] freq = new int[nTerms];
  Term[] terms = new Term[nTerms];
  for (int i=0; i<nTerms; i++) {
    int f = (nTerms+1)-i;  // make first terms less frequent
    freq[i] = (int)Math.ceil(Math.pow(f,power));
    terms[i] = new Term("f",Character.toString((char)('A'+i)));
  }

  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
  for (int i=0; i<nDocs; i++) {
    Document d = new Document();
    for (int j=0; j<nTerms; j++) {
      if (random().nextInt(freq[j]) == 0) {
        d.add(newStringField("f", terms[j].text(), Field.Store.NO));
        //System.out.println(d);
      }
    }
    iw.addDocument(d);
  }
  iw.forceMerge(1);
  iw.close();
}

Source File: TestPointQueries.java From lucene-solr with Apache License 2.0

5 votes

@Nightly
public void testInversePointRange() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
  final int numDims = TestUtil.nextInt(random(), 1, 3);
  final int numDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization
  for (int i = 0; i < numDocs; ++i) {
    Document doc = new Document();
    int[] values = new int[numDims];
    Arrays.fill(values, i);
    doc.add(new IntPoint("f", values));
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader r = DirectoryReader.open(w);
  w.close();

  IndexSearcher searcher = newSearcher(r);
  int[] low = new int[numDims];
  int[] high = new int[numDims];
  Arrays.fill(high, numDocs - 2);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(low, 1);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(high, numDocs - 1);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(low, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(high, numDocs - BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));

  r.close();
  dir.close();
}

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

5 votes

private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  Document doc = new Document();
  Field storedField = newStringField("stored", "", Field.Store.YES);
  Field dvField = new NumericDocValuesField("dv", 0);
  doc.add(storedField);
  doc.add(dvField);

  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier longs = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    if (random().nextDouble() > density) {
      writer.addDocument(new Document());
      continue;
    }
    long value = longs.getAsLong();
    storedField.setStringValue(Long.toString(value));
    dvField.setLongValue(value);
    writer.addDocument(doc);
  }

  writer.forceMerge(1);

  writer.close();
  
  // compare
  assertDVIterate(dir);
  assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)

  dir.close();
}

Source File: MergeCommand.java From clue with Apache License 2.0

5 votes

@Override
public void execute(Namespace args, PrintStream out) throws Exception {
  int count = args.getInt("num");
  IndexWriter writer = ctx.getIndexWriter();
  if (writer != null) {
    writer.forceMerge(count, true);
    writer.commit();
    ctx.refreshReader();
  }
  else {
    out.println("unable to open index writer, index is in readonly mode");
  }
}

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

4 votes

@Nightly
public void testSortedSetAroundBlockSize() throws IOException {
  final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
  for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) {
    final Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
    ByteBuffersDataOutput out = new ByteBuffersDataOutput();
    Document doc = new Document();
    SortedSetDocValuesField field1 = new SortedSetDocValuesField("sset", new BytesRef());
    doc.add(field1);
    SortedSetDocValuesField field2 = new SortedSetDocValuesField("sset", new BytesRef());
    doc.add(field2);
    for (int i = 0; i < maxDoc; ++i) {
      BytesRef s1 = new BytesRef(TestUtil.randomSimpleString(random(), 2));
      BytesRef s2 = new BytesRef(TestUtil.randomSimpleString(random(), 2));
      field1.setBytesValue(s1);
      field2.setBytesValue(s2);
      w.addDocument(doc);
      Set<BytesRef> set = new TreeSet<>(Arrays.asList(s1, s2));
      out.writeVInt(set.size());
      for (BytesRef ref : set) {
        out.writeVInt(ref.length);
        out.writeBytes(ref.bytes, ref.offset, ref.length);
      }
    }

    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    LeafReader sr = getOnlyLeafReader(r);
    assertEquals(maxDoc, sr.maxDoc());
    SortedSetDocValues values = sr.getSortedSetDocValues("sset");
    assertNotNull(values);
    ByteBuffersDataInput in = out.toDataInput();
    BytesRefBuilder b = new BytesRefBuilder();
    for (int i = 0; i < maxDoc; ++i) {
      assertEquals(i, values.nextDoc());
      final int numValues = in.readVInt();

      for (int j = 0; j < numValues; ++j) {
        b.setLength(in.readVInt());
        b.grow(b.length());
        in.readBytes(b.bytes(), 0, b.length());
        assertEquals(b.get(), values.lookupOrd(values.nextOrd()));
      }

      assertEquals(SortedSetDocValues.NO_MORE_ORDS, values.nextOrd());
    }
    r.close();
    dir.close();
  }
}

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

4 votes

@Nightly
public void testSortedNumericAroundBlockSize() throws IOException {
  final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
  for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) {
    final Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
    ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();

    Document doc = new Document();
    SortedNumericDocValuesField field1 = new SortedNumericDocValuesField("snum", 0L);
    doc.add(field1);
    SortedNumericDocValuesField field2 = new SortedNumericDocValuesField("snum", 0L);
    doc.add(field2);
    for (int i = 0; i < maxDoc; ++i) {
      long s1 = random().nextInt(100);
      long s2 = random().nextInt(100);
      field1.setLongValue(s1);
      field2.setLongValue(s2);
      w.addDocument(doc);
      buffer.writeVLong(Math.min(s1, s2));
      buffer.writeVLong(Math.max(s1, s2));
    }

    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    LeafReader sr = getOnlyLeafReader(r);
    assertEquals(maxDoc, sr.maxDoc());
    SortedNumericDocValues values = sr.getSortedNumericDocValues("snum");
    assertNotNull(values);
    ByteBuffersDataInput dataInput = buffer.toDataInput();
    for (int i = 0; i < maxDoc; ++i) {
      assertEquals(i, values.nextDoc());
      assertEquals(2, values.docValueCount());
      assertEquals(dataInput.readVLong(), values.nextValue());
      assertEquals(dataInput.readVLong(), values.nextValue());
    }
    r.close();
    dir.close();
  }
}

Source File: TestDirectoryTaxonomyReader.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testOpenIfChangedMergedSegment() throws Exception {
  // test openIfChanged() when all index segments were merged - used to be
  // a bug in ParentArray, caught by testOpenIfChangedManySegments - only
  // this test is not random
  Directory dir = newDirectory();
  
  // hold onto IW to forceMerge
  // note how we don't close it, since DTW will close it.
  final IndexWriter iw = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(new LogByteSizeMergePolicy()));
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriter openIndexWriter(Directory directory,
        IndexWriterConfig config) throws IOException {
      return iw;
    }
  };
  
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  assertEquals(1, reader.getSize());
  assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);

  // add category and call forceMerge -- this should flush IW and merge segments down to 1
  // in ParentArray.initFromReader, this used to fail assuming there are no parents.
  writer.addCategory(new FacetLabel("1"));
  iw.forceMerge(1);
  
  // now calling openIfChanged should trip on the bug
  TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
  assertNotNull(newtr);
  reader.close();
  reader = newtr;
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
  
  reader.close();
  writer.close();
  dir.close();
}

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}

Source File: TestIndexOrDocValuesQuery.java From lucene-solr with Apache License 2.0

4 votes

public void testUseIndexForSelectiveMultiValueQueries() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
      // relies on costs and PointValues.estimateCost so we need the default codec
      .setCodec(TestUtil.getDefaultCodec()));
  for (int i = 0; i < 2000; ++i) {
    Document doc = new Document();
    if (i < 1000) {
      doc.add(new StringField("f1", "bar", Store.NO));
      for (int j =0; j < 500; j++) {
        doc.add(new LongPoint("f2", 42L));
        doc.add(new SortedNumericDocValuesField("f2", 42L));
      }
    } else if (i == 1001) {
      doc.add(new StringField("f1", "foo", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new SortedNumericDocValuesField("f2", 42L));
    } else {
      doc.add(new StringField("f1", "bar", Store.NO));
      for (int j =0; j < 100; j++) {
        doc.add(new LongPoint("f2", 2L));
        doc.add(new SortedNumericDocValuesField("f2", 2L));
      }
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader reader = DirectoryReader.open(w);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setQueryCache(null);

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q1 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), SortedNumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
      .build();

  final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
  final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s1.twoPhaseIterator()); // means we use points

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
      .build();

  final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
  final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s2.twoPhaseIterator()); // means we use points

  // The term query is more selective, so the IndexOrDocValuesQuery should use doc values
  final Query q3 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
      .build();

  final Weight w3 = searcher.createWeight(searcher.rewrite(q3), ScoreMode.COMPLETE, 1);
  final Scorer s3 = w3.scorer(searcher.getIndexReader().leaves().get(0));
  assertNotNull(s3.twoPhaseIterator()); // means we use doc values

  reader.close();
  w.close();
  dir.close();
}

Source File: TestReqOptSumScorer.java From lucene-solr with Apache License 2.0

4 votes

public void testMaxBlock() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  ft.setTokenized(true);
  ft.freeze();

  for (int i = 0; i < 1024; i++) {
    // create documents with an increasing number of As and one B
    Document doc = new Document();
    doc.add(new Field("foo", new TermFreqTokenStream("a", i+1), ft));
    if (random().nextFloat() < 0.5f) {
      doc.add(new Field("foo", new TermFreqTokenStream("b", 1), ft));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  w.close();
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new TestSimilarity.SimpleSimilarity());
  // freq == score
  // searcher.setSimilarity(new TestSimilarity.SimpleSimilarity());
  final Query reqQ = new TermQuery(new Term("foo", "a"));
  final Query optQ = new TermQuery(new Term("foo", "b"));
  final Query boolQ = new BooleanQuery.Builder()
      .add(reqQ, Occur.MUST)
      .add(optQ, Occur.SHOULD)
      .build();
  Scorer actual = reqOptScorer(searcher, reqQ, optQ, true);
  Scorer expected = searcher
      .createWeight(boolQ, ScoreMode.COMPLETE, 1)
      .scorer(searcher.getIndexReader().leaves().get(0));
  actual.setMinCompetitiveScore(Math.nextUp(1));
  // Checks that all blocks are fully visited
  for (int i = 0; i < 1024; i++) {
    assertEquals(i, actual.iterator().nextDoc());
    assertEquals(i, expected.iterator().nextDoc());
    assertEquals(actual.score(),expected.score(), 0);
  }
  reader.close();
  dir.close();
}

Source File: TestExternalCodecs.java From lucene-solr with Apache License 2.0

4 votes

public void testPerFieldCodec() throws Exception {
  
  final int NUM_DOCS = atLeast(173);
  if (VERBOSE) {
    System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
  }

  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false); // we use a custom codec provider
  IndexWriter w = new IndexWriter(
      dir,
      newIndexWriterConfig(new MockAnalyzer(random())).
      setCodec(new CustomPerFieldCodec()).
          setMergePolicy(newLogMergePolicy(3))
  );
  Document doc = new Document();
  // uses default codec:
  doc.add(newTextField("field1", "this field uses the standard codec as the test", Field.Store.NO));
  // uses memory codec:
  Field field2 = newTextField("field2", "this field uses the memory codec as the test", Field.Store.NO);
  doc.add(field2);
  
  Field idField = newStringField("id", "", Field.Store.NO);

  doc.add(idField);
  for(int i=0;i<NUM_DOCS;i++) {
    idField.setStringValue(""+i);
    w.addDocument(doc);
    if ((i+1)%10 == 0) {
      w.commit();
    }
  }
  if (VERBOSE) {
    System.out.println("TEST: now delete id=77");
  }
  w.deleteDocuments(new Term("id", "77"));

  IndexReader r = DirectoryReader.open(w);
  
  assertEquals(NUM_DOCS-1, r.numDocs());
  IndexSearcher s = newSearcher(r);
  assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field1", "standard"))));
  assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field2", "memory"))));
  r.close();

  if (VERBOSE) {
    System.out.println("\nTEST: now delete 2nd doc");
  }
  w.deleteDocuments(new Term("id", "44"));

  if (VERBOSE) {
    System.out.println("\nTEST: now force merge");
  }
  w.forceMerge(1);
  if (VERBOSE) {
    System.out.println("\nTEST: now open reader");
  }
  r = DirectoryReader.open(w);
  assertEquals(NUM_DOCS-2, r.maxDoc());
  assertEquals(NUM_DOCS-2, r.numDocs());
  s = newSearcher(r);
  assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field1", "standard"))));
  assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field2", "memory"))));
  assertEquals(1, s.count(new TermQuery(new Term("id", "76"))));
  assertEquals(0, s.count(new TermQuery(new Term("id", "77"))));
  assertEquals(0, s.count(new TermQuery(new Term("id", "44"))));

  if (VERBOSE) {
    System.out.println("\nTEST: now close NRT reader");
  }
  r.close();

  w.close();

  dir.close();
}

Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0

4 votes

@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
  
  File tmpDir = createTempDir().toFile();
  File indexDir = new File(tmpDir, "spellingIdx");
  //create a standalone index
  File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir.toPath());
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(new WhitespaceAnalyzer())
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("flesh");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("flesh is null and it shouldn't be", suggestions != null);
    assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
    assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("Caroline");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}

Source File: TestFieldCache.java From lucene-solr with Apache License 2.0

4 votes

@BeforeClass
public static void beforeClass() throws Exception {
  NUM_DOCS = atLeast(500);
  NUM_ORDS = atLeast(2);
  directory = newDirectory();
  IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
  long theLong = Long.MAX_VALUE;
  double theDouble = Double.MAX_VALUE;
  int theInt = Integer.MAX_VALUE;
  float theFloat = Float.MAX_VALUE;
  unicodeStrings = new String[NUM_DOCS];
  multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
  if (VERBOSE) {
    System.out.println("TEST: setUp");
  }
  for (int i = 0; i < NUM_DOCS; i++){
    Document doc = new Document();
    doc.add(new LongPoint("theLong", theLong--));
    doc.add(new DoublePoint("theDouble", theDouble--));
    doc.add(new IntPoint("theInt", theInt--));
    doc.add(new FloatPoint("theFloat", theFloat--));
    if (i%2 == 0) {
      doc.add(new IntPoint("sparse", i));
    }

    if (i%2 == 0) {
      doc.add(new IntPoint("numInt", i));
    }

    // sometimes skip the field:
    if (random().nextInt(40) != 17) {
      unicodeStrings[i] = generateString(i);
      doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
    }

    // sometimes skip the field:
    if (random().nextInt(10) != 8) {
      for (int j = 0; j < NUM_ORDS; j++) {
        String newValue = generateString(i);
        multiValued[i][j] = new BytesRef(newValue);
        doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
      }
      Arrays.sort(multiValued[i]);
    }
    writer.addDocument(doc);
  }
  writer.forceMerge(1); // this test relies on one segment and docid order
  IndexReader r = DirectoryReader.open(writer);
  assertEquals(1, r.leaves().size());
  reader = r.leaves().get(0).reader();
  TestUtil.checkReader(reader);
  writer.close();
}

Source File: TestReqOptSumScorer.java From lucene-solr with Apache License 2.0

4 votes

public void testMaxScoreSegment() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
  for (String[] values : Arrays.asList(
      new String[]{ "A" },            // 0
      new String[]{ "A" },            // 1
      new String[]{ },                // 2
      new String[]{ "A", "B" },       // 3
      new String[]{ "A" },            // 4
      new String[]{ "B" },            // 5
      new String[]{ "A", "B" },       // 6
      new String[]{ "B" }             // 7
  )) {
    Document doc = new Document();
    for (String value : values) {
      doc.add(new StringField("foo", value, Store.NO));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  w.close();

  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  final Query reqQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "A")));
  final Query optQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
  Scorer scorer = reqOptScorer(searcher, reqQ, optQ, false);
  assertEquals(0, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(1, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(4, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, false);
  scorer.setMinCompetitiveScore(Math.nextDown(1f));
  assertEquals(0, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(1, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(4, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, false);
  scorer.setMinCompetitiveScore(Math.nextUp(1f));
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, true);
  scorer.setMinCompetitiveScore(Math.nextUp(2f));
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  reader.close();
  dir.close();
}

Source File: TestIndexOrDocValuesQuery.java From lucene-solr with Apache License 2.0

4 votes

public void testUseIndexForSelectiveQueries() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
      // relies on costs and PointValues.estimateCost so we need the default codec
      .setCodec(TestUtil.getDefaultCodec()));
  for (int i = 0; i < 2000; ++i) {
    Document doc = new Document();
    if (i == 42) {
      doc.add(new StringField("f1", "bar", Store.NO));
      doc.add(new LongPoint("f2", 42L));
      doc.add(new NumericDocValuesField("f2", 42L));
    } else if (i == 100) {
      doc.add(new StringField("f1", "foo", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new NumericDocValuesField("f2", 2L));
    } else {
      doc.add(new StringField("f1", "bar", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new NumericDocValuesField("f2", 2L));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader reader = DirectoryReader.open(w);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setQueryCache(null);

  // The term query is more selective, so the IndexOrDocValuesQuery should use doc values
  final Query q1 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), NumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
      .build();

  final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
  final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
  assertNotNull(s1.twoPhaseIterator()); // means we use doc values

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), NumericDocValuesField.newSlowRangeQuery("f2", 42L, 42L)), Occur.MUST)
      .build();

  final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
  final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s2.twoPhaseIterator()); // means we use points

  reader.close();
  w.close();
  dir.close();
}

Java Code Examples for org.apache.lucene.index.IndexWriter#forceMerge()