Java Code Examples for org.apache.lucene.index.IndexWriter#forceMerge()
The following examples show how to use
org.apache.lucene.index.IndexWriter#forceMerge() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseShapeTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
protected void indexRandomShapes(IndexWriter w, Object... shapes) throws Exception { Set<Integer> deleted = new HashSet<>(); for (int id = 0; id < shapes.length; ++id) { Document doc = new Document(); doc.add(newStringField("id", "" + id, Field.Store.NO)); doc.add(new NumericDocValuesField("id", id)); if (shapes[id] != null) { addShapeToDoc(FIELD_NAME, doc, shapes[id]); } w.addDocument(doc); if (id > 0 && random().nextInt(100) == 42) { int idToDelete = random().nextInt(id); w.deleteDocuments(new Term("id", ""+idToDelete)); deleted.add(idToDelete); if (VERBOSE) { System.out.println(" delete id=" + idToDelete); } } } if (randomBoolean()) { w.forceMerge(1); } }
Example 2
Source File: TestMergeSchedulerExternal.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testCustomMergeScheduler() throws Exception { // we don't really need to execute anything, just to make sure the custom MS // compiles. But ensure that it can be used as well, e.g., no other hidden // dependencies or something. Therefore, don't use any random API ! Directory dir = new ByteBuffersDirectory(); IndexWriterConfig conf = new IndexWriterConfig(null); conf.setMergeScheduler(new ReportingMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(new Document()); writer.commit(); // trigger flush writer.addDocument(new Document()); writer.commit(); // trigger flush writer.forceMerge(1); writer.close(); dir.close(); }
Example 3
Source File: PayloadHelper.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Sets up a RAM-resident Directory, and adds documents (using English.intToEnglish()) with two fields: field and multiField * and analyzes them using the PayloadAnalyzer * @param similarity The Similarity class to use in the Searcher * @param numDocs The num docs to add * @return An IndexSearcher */ // TODO: randomize public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { Directory directory = new MockDirectoryWrapper(random, new ByteBuffersDirectory()); PayloadAnalyzer analyzer = new PayloadAnalyzer(); // TODO randomize this IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( analyzer).setSimilarity(similarity)); // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES)); doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES)); doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES)); writer.addDocument(doc); } writer.forceMerge(1); reader = DirectoryReader.open(writer); writer.close(); IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader)); searcher.setSimilarity(similarity); return searcher; }
Example 4
Source File: TestLucene84PostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Make sure the final sub-block(s) are not skipped. */ public void testFinalBlock() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random()))); for(int i=0;i<25;i++) { Document doc = new Document(); doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO)); doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); DirectoryReader r = DirectoryReader.open(w); assertEquals(1, r.leaves().size()); FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field"); // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z): Stats stats = field.getStats(); assertEquals(0, stats.floorBlockCount); assertEquals(2, stats.nonFloorBlockCount); r.close(); w.close(); d.close(); }
Example 5
Source File: TestFieldCacheSort.java From lucene-solr with Apache License 2.0 | 6 votes |
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */ public void testMultiValuedField() throws IOException { Directory indexStore = newDirectory(); IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random()))); for(int i=0; i<5; i++) { Document doc = new Document(); doc.add(new StringField("string", "a"+i, Field.Store.NO)); doc.add(new StringField("string", "b"+i, Field.Store.NO)); writer.addDocument(doc); } writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases writer.close(); Sort sort = new Sort( new SortField("string", SortField.Type.STRING), SortField.FIELD_DOC); IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore), Collections.singletonMap("string", Type.SORTED)); IndexSearcher searcher = new IndexSearcher(reader); expectThrows(IllegalStateException.class, () -> { searcher.search(new MatchAllDocsQuery(), 500, sort); }); reader.close(); indexStore.close(); }
Example 6
Source File: TestScorerPerf.java From lucene-solr with Apache License 2.0 | 6 votes |
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception { int[] freq = new int[nTerms]; Term[] terms = new Term[nTerms]; for (int i=0; i<nTerms; i++) { int f = (nTerms+1)-i; // make first terms less frequent freq[i] = (int)Math.ceil(Math.pow(f,power)); terms[i] = new Term("f",Character.toString((char)('A'+i))); } IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE)); for (int i=0; i<nDocs; i++) { Document d = new Document(); for (int j=0; j<nTerms; j++) { if (random().nextInt(freq[j]) == 0) { d.add(newStringField("f", terms[j].text(), Field.Store.NO)); //System.out.println(d); } } iw.addDocument(d); } iw.forceMerge(1); iw.close(); }
Example 7
Source File: TestPointQueries.java From lucene-solr with Apache License 2.0 | 5 votes |
@Nightly public void testInversePointRange() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); final int numDims = TestUtil.nextInt(random(), 1, 3); final int numDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); int[] values = new int[numDims]; Arrays.fill(values, i); doc.add(new IntPoint("f", values)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = DirectoryReader.open(w); w.close(); IndexSearcher searcher = newSearcher(r); int[] low = new int[numDims]; int[] high = new int[numDims]; Arrays.fill(high, numDocs - 2); assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high))); Arrays.fill(low, 1); assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high))); Arrays.fill(high, numDocs - 1); assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high))); Arrays.fill(low, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1); assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high))); Arrays.fill(high, numDocs - BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high))); r.close(); dir.close(); }
Example 8
Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE)); conf.setRAMBufferSizeMB(-1); conf.setMergePolicy(newLogMergePolicy(random().nextBoolean())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); Field storedField = newStringField("stored", "", Field.Store.YES); Field dvField = new NumericDocValuesField("dv", 0); doc.add(storedField); doc.add(dvField); final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3); final LongSupplier longs = blocksOfVariousBPV(); for (int i = 0; i < numDocs; i++) { if (random().nextDouble() > density) { writer.addDocument(new Document()); continue; } long value = longs.getAsLong(); storedField.setStringValue(Long.toString(value)); dvField.setLongValue(value); writer.addDocument(doc); } writer.forceMerge(1); writer.close(); // compare assertDVIterate(dir); assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs) dir.close(); }
Example 9
Source File: MergeCommand.java From clue with Apache License 2.0 | 5 votes |
@Override public void execute(Namespace args, PrintStream out) throws Exception { int count = args.getInt("num"); IndexWriter writer = ctx.getIndexWriter(); if (writer != null) { writer.forceMerge(count, true); writer.commit(); ctx.refreshReader(); } else { out.println("unable to open index writer, index is in readonly mode"); } }
Example 10
Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
@Nightly public void testSortedSetAroundBlockSize() throws IOException { final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) { final Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy())); ByteBuffersDataOutput out = new ByteBuffersDataOutput(); Document doc = new Document(); SortedSetDocValuesField field1 = new SortedSetDocValuesField("sset", new BytesRef()); doc.add(field1); SortedSetDocValuesField field2 = new SortedSetDocValuesField("sset", new BytesRef()); doc.add(field2); for (int i = 0; i < maxDoc; ++i) { BytesRef s1 = new BytesRef(TestUtil.randomSimpleString(random(), 2)); BytesRef s2 = new BytesRef(TestUtil.randomSimpleString(random(), 2)); field1.setBytesValue(s1); field2.setBytesValue(s2); w.addDocument(doc); Set<BytesRef> set = new TreeSet<>(Arrays.asList(s1, s2)); out.writeVInt(set.size()); for (BytesRef ref : set) { out.writeVInt(ref.length); out.writeBytes(ref.bytes, ref.offset, ref.length); } } w.forceMerge(1); DirectoryReader r = DirectoryReader.open(w); w.close(); LeafReader sr = getOnlyLeafReader(r); assertEquals(maxDoc, sr.maxDoc()); SortedSetDocValues values = sr.getSortedSetDocValues("sset"); assertNotNull(values); ByteBuffersDataInput in = out.toDataInput(); BytesRefBuilder b = new BytesRefBuilder(); for (int i = 0; i < maxDoc; ++i) { assertEquals(i, values.nextDoc()); final int numValues = in.readVInt(); for (int j = 0; j < numValues; ++j) { b.setLength(in.readVInt()); b.grow(b.length()); in.readBytes(b.bytes(), 0, b.length()); assertEquals(b.get(), values.lookupOrd(values.nextOrd())); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, values.nextOrd()); } r.close(); dir.close(); } }
Example 11
Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
@Nightly public void testSortedNumericAroundBlockSize() throws IOException { final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) { final Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy())); ByteBuffersDataOutput buffer = new ByteBuffersDataOutput(); Document doc = new Document(); SortedNumericDocValuesField field1 = new SortedNumericDocValuesField("snum", 0L); doc.add(field1); SortedNumericDocValuesField field2 = new SortedNumericDocValuesField("snum", 0L); doc.add(field2); for (int i = 0; i < maxDoc; ++i) { long s1 = random().nextInt(100); long s2 = random().nextInt(100); field1.setLongValue(s1); field2.setLongValue(s2); w.addDocument(doc); buffer.writeVLong(Math.min(s1, s2)); buffer.writeVLong(Math.max(s1, s2)); } w.forceMerge(1); DirectoryReader r = DirectoryReader.open(w); w.close(); LeafReader sr = getOnlyLeafReader(r); assertEquals(maxDoc, sr.maxDoc()); SortedNumericDocValues values = sr.getSortedNumericDocValues("snum"); assertNotNull(values); ByteBuffersDataInput dataInput = buffer.toDataInput(); for (int i = 0; i < maxDoc; ++i) { assertEquals(i, values.nextDoc()); assertEquals(2, values.docValueCount()); assertEquals(dataInput.readVLong(), values.nextValue()); assertEquals(dataInput.readVLong(), values.nextValue()); } r.close(); dir.close(); } }
Example 12
Source File: TestDirectoryTaxonomyReader.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testOpenIfChangedMergedSegment() throws Exception { // test openIfChanged() when all index segments were merged - used to be // a bug in ParentArray, caught by testOpenIfChangedManySegments - only // this test is not random Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(1, reader.getSize()); assertEquals(1, reader.getParallelTaxonomyArrays().parents().length); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. writer.addCategory(new FacetLabel("1")); iw.forceMerge(1); // now calling openIfChanged should trip on the bug TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
Example 13
Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE)); conf.setRAMBufferSizeMB(-1); conf.setMergePolicy(newLogMergePolicy(random().nextBoolean())); IndexWriter writer = new IndexWriter(dir, conf); final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3); final LongSupplier values = blocksOfVariousBPV(); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int valueCount = (int) counts.getAsLong(); long valueArray[] = new long[valueCount]; for (int j = 0; j < valueCount; j++) { long value = values.getAsLong(); valueArray[j] = value; doc.add(new SortedNumericDocValuesField("dv", value)); } Arrays.sort(valueArray); for (int j = 0; j < valueCount; j++) { doc.add(new StoredField("stored", Long.toString(valueArray[j]))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } writer.forceMerge(1); writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv"); for (int i = 0; i < r.maxDoc(); i++) { if (i > docValues.docID()) { docValues.nextDoc(); } String expected[] = r.document(i).getValues("stored"); if (i < docValues.docID()) { assertEquals(0, expected.length); } else { String actual[] = new String[docValues.docValueCount()]; for (int j = 0; j < actual.length; j++) { actual[j] = Long.toString(docValues.nextValue()); } assertArrayEquals(expected, actual); } } } ir.close(); dir.close(); }
Example 14
Source File: TestIndexOrDocValuesQuery.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testUseIndexForSelectiveMultiValueQueries() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig() // relies on costs and PointValues.estimateCost so we need the default codec .setCodec(TestUtil.getDefaultCodec())); for (int i = 0; i < 2000; ++i) { Document doc = new Document(); if (i < 1000) { doc.add(new StringField("f1", "bar", Store.NO)); for (int j =0; j < 500; j++) { doc.add(new LongPoint("f2", 42L)); doc.add(new SortedNumericDocValuesField("f2", 42L)); } } else if (i == 1001) { doc.add(new StringField("f1", "foo", Store.NO)); doc.add(new LongPoint("f2", 2L)); doc.add(new SortedNumericDocValuesField("f2", 42L)); } else { doc.add(new StringField("f1", "bar", Store.NO)); for (int j =0; j < 100; j++) { doc.add(new LongPoint("f2", 2L)); doc.add(new SortedNumericDocValuesField("f2", 2L)); } } w.addDocument(doc); } w.forceMerge(1); IndexReader reader = DirectoryReader.open(w); IndexSearcher searcher = newSearcher(reader); searcher.setQueryCache(null); // The term query is less selective, so the IndexOrDocValuesQuery should use points final Query q1 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "bar")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), SortedNumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST) .build(); final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1); final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0)); assertNull(s1.twoPhaseIterator()); // means we use points // The term query is less selective, so the IndexOrDocValuesQuery should use points final Query q2 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "bar")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST) .build(); final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1); final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0)); assertNull(s2.twoPhaseIterator()); // means we use points // The term query is more selective, so the IndexOrDocValuesQuery should use doc values final Query q3 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "foo")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST) .build(); final Weight w3 = searcher.createWeight(searcher.rewrite(q3), ScoreMode.COMPLETE, 1); final Scorer s3 = w3.scorer(searcher.getIndexReader().leaves().get(0)); assertNotNull(s3.twoPhaseIterator()); // means we use doc values reader.close(); w.close(); dir.close(); }
Example 15
Source File: TestReqOptSumScorer.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMaxBlock() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy())); FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setTokenized(true); ft.freeze(); for (int i = 0; i < 1024; i++) { // create documents with an increasing number of As and one B Document doc = new Document(); doc.add(new Field("foo", new TermFreqTokenStream("a", i+1), ft)); if (random().nextFloat() < 0.5f) { doc.add(new Field("foo", new TermFreqTokenStream("b", 1), ft)); } w.addDocument(doc); } w.forceMerge(1); w.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new TestSimilarity.SimpleSimilarity()); // freq == score // searcher.setSimilarity(new TestSimilarity.SimpleSimilarity()); final Query reqQ = new TermQuery(new Term("foo", "a")); final Query optQ = new TermQuery(new Term("foo", "b")); final Query boolQ = new BooleanQuery.Builder() .add(reqQ, Occur.MUST) .add(optQ, Occur.SHOULD) .build(); Scorer actual = reqOptScorer(searcher, reqQ, optQ, true); Scorer expected = searcher .createWeight(boolQ, ScoreMode.COMPLETE, 1) .scorer(searcher.getIndexReader().leaves().get(0)); actual.setMinCompetitiveScore(Math.nextUp(1)); // Checks that all blocks are fully visited for (int i = 0; i < 1024; i++) { assertEquals(i, actual.iterator().nextDoc()); assertEquals(i, expected.iterator().nextDoc()); assertEquals(actual.score(),expected.score(), 0); } reader.close(); dir.close(); }
Example 16
Source File: TestExternalCodecs.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testPerFieldCodec() throws Exception { final int NUM_DOCS = atLeast(173); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS); } BaseDirectoryWrapper dir = newDirectory(); dir.setCheckIndexOnClose(false); // we use a custom codec provider IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())). setCodec(new CustomPerFieldCodec()). setMergePolicy(newLogMergePolicy(3)) ); Document doc = new Document(); // uses default codec: doc.add(newTextField("field1", "this field uses the standard codec as the test", Field.Store.NO)); // uses memory codec: Field field2 = newTextField("field2", "this field uses the memory codec as the test", Field.Store.NO); doc.add(field2); Field idField = newStringField("id", "", Field.Store.NO); doc.add(idField); for(int i=0;i<NUM_DOCS;i++) { idField.setStringValue(""+i); w.addDocument(doc); if ((i+1)%10 == 0) { w.commit(); } } if (VERBOSE) { System.out.println("TEST: now delete id=77"); } w.deleteDocuments(new Term("id", "77")); IndexReader r = DirectoryReader.open(w); assertEquals(NUM_DOCS-1, r.numDocs()); IndexSearcher s = newSearcher(r); assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field1", "standard")))); assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field2", "memory")))); r.close(); if (VERBOSE) { System.out.println("\nTEST: now delete 2nd doc"); } w.deleteDocuments(new Term("id", "44")); if (VERBOSE) { System.out.println("\nTEST: now force merge"); } w.forceMerge(1); if (VERBOSE) { System.out.println("\nTEST: now open reader"); } r = DirectoryReader.open(w); assertEquals(NUM_DOCS-2, r.maxDoc()); assertEquals(NUM_DOCS-2, r.numDocs()); s = newSearcher(r); assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field1", "standard")))); assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field2", "memory")))); assertEquals(1, s.count(new TermQuery(new Term("id", "76")))); assertEquals(0, s.count(new TermQuery(new Term("id", "77")))); assertEquals(0, s.count(new TermQuery(new Term("id", "44")))); if (VERBOSE) { System.out.println("\nTEST: now close NRT reader"); } r.close(); w.close(); dir.close(); }
Example 17
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings({"unchecked"}) public void testAlternateLocation() throws Exception { String[] ALT_DOCS = new String[]{ "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" }; IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); @SuppressWarnings({"rawtypes"}) NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File tmpDir = createTempDir().toFile(); File indexDir = new File(tmpDir, "spellingIdx"); //create a standalone index File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime()); Directory dir = newFSDirectory(altIndexDir.toPath()); IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(new WhitespaceAnalyzer()) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES)); iw.addDocument(doc); } iw.forceMerge(1); iw.close(); dir.close(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); h.getCore().withSearcher(searcher -> { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("flesh is null and it shouldn't be", suggestions != null); assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true); assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("Caroline"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); return null; }); }
Example 18
Source File: TestFieldCache.java From lucene-solr with Apache License 2.0 | 4 votes |
@BeforeClass public static void beforeClass() throws Exception { NUM_DOCS = atLeast(500); NUM_ORDS = atLeast(2); directory = newDirectory(); IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy())); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; unicodeStrings = new String[NUM_DOCS]; multiValued = new BytesRef[NUM_DOCS][NUM_ORDS]; if (VERBOSE) { System.out.println("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++){ Document doc = new Document(); doc.add(new LongPoint("theLong", theLong--)); doc.add(new DoublePoint("theDouble", theDouble--)); doc.add(new IntPoint("theInt", theInt--)); doc.add(new FloatPoint("theFloat", theFloat--)); if (i%2 == 0) { doc.add(new IntPoint("sparse", i)); } if (i%2 == 0) { doc.add(new IntPoint("numInt", i)); } // sometimes skip the field: if (random().nextInt(40) != 17) { unicodeStrings[i] = generateString(i); doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (random().nextInt(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { String newValue = generateString(i); multiValued[i][j] = new BytesRef(newValue); doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Arrays.sort(multiValued[i]); } writer.addDocument(doc); } writer.forceMerge(1); // this test relies on one segment and docid order IndexReader r = DirectoryReader.open(writer); assertEquals(1, r.leaves().size()); reader = r.leaves().get(0).reader(); TestUtil.checkReader(reader); writer.close(); }
Example 19
Source File: TestReqOptSumScorer.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMaxScoreSegment() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy())); for (String[] values : Arrays.asList( new String[]{ "A" }, // 0 new String[]{ "A" }, // 1 new String[]{ }, // 2 new String[]{ "A", "B" }, // 3 new String[]{ "A" }, // 4 new String[]{ "B" }, // 5 new String[]{ "A", "B" }, // 6 new String[]{ "B" } // 7 )) { Document doc = new Document(); for (String value : values) { doc.add(new StringField("foo", value, Store.NO)); } w.addDocument(doc); } w.forceMerge(1); w.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); final Query reqQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))); final Query optQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))); Scorer scorer = reqOptScorer(searcher, reqQ, optQ, false); assertEquals(0, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(1, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(3, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(4, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(6, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); scorer = reqOptScorer(searcher, reqQ, optQ, false); scorer.setMinCompetitiveScore(Math.nextDown(1f)); assertEquals(0, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(1, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(3, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(4, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(6, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); scorer = reqOptScorer(searcher, reqQ, optQ, false); scorer.setMinCompetitiveScore(Math.nextUp(1f)); assertEquals(3, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(6, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); scorer = reqOptScorer(searcher, reqQ, optQ, true); scorer.setMinCompetitiveScore(Math.nextUp(2f)); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); reader.close(); dir.close(); }
Example 20
Source File: TestIndexOrDocValuesQuery.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testUseIndexForSelectiveQueries() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig() // relies on costs and PointValues.estimateCost so we need the default codec .setCodec(TestUtil.getDefaultCodec())); for (int i = 0; i < 2000; ++i) { Document doc = new Document(); if (i == 42) { doc.add(new StringField("f1", "bar", Store.NO)); doc.add(new LongPoint("f2", 42L)); doc.add(new NumericDocValuesField("f2", 42L)); } else if (i == 100) { doc.add(new StringField("f1", "foo", Store.NO)); doc.add(new LongPoint("f2", 2L)); doc.add(new NumericDocValuesField("f2", 2L)); } else { doc.add(new StringField("f1", "bar", Store.NO)); doc.add(new LongPoint("f2", 2L)); doc.add(new NumericDocValuesField("f2", 2L)); } w.addDocument(doc); } w.forceMerge(1); IndexReader reader = DirectoryReader.open(w); IndexSearcher searcher = newSearcher(reader); searcher.setQueryCache(null); // The term query is more selective, so the IndexOrDocValuesQuery should use doc values final Query q1 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "foo")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), NumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST) .build(); final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1); final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0)); assertNotNull(s1.twoPhaseIterator()); // means we use doc values // The term query is less selective, so the IndexOrDocValuesQuery should use points final Query q2 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "bar")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), NumericDocValuesField.newSlowRangeQuery("f2", 42L, 42L)), Occur.MUST) .build(); final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1); final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0)); assertNull(s2.twoPhaseIterator()); // means we use points reader.close(); w.close(); dir.close(); }