Java Code Examples for org.apache.lucene.util.TestUtil#docs()
The following examples show how to use
org.apache.lucene.util.TestUtil#docs() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMemoryIndexAgainstDirectory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testDocsEnumStart() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); MemoryIndex memory = new MemoryIndex(random().nextBoolean(), false, random().nextInt(50) * 1024 * 1024); memory.addField("foo", "bar", analyzer); LeafReader reader = (LeafReader) memory.createSearcher().getIndexReader(); TestUtil.checkReader(reader); PostingsEnum disi = TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, PostingsEnum.NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.terms("foo").iterator(); assertTrue(te.seekExact(new BytesRef("bar"))); disi = te.postings(disi, PostingsEnum.NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.close(); }
Example 2
Source File: TestTermVectorsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testDocsEnum() throws IOException { TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random())); for (int j = 0; j < 5; j++) { Terms vector = reader.get(j).terms(testFields[0]); assertNotNull(vector); assertEquals(testTerms.length, vector.size()); TermsEnum termsEnum = vector.iterator(); PostingsEnum postingsEnum = null; for (int i = 0; i < testTerms.length; i++) { final BytesRef text = termsEnum.next(); assertNotNull(text); String term = text.utf8ToString(); //System.out.println("Term: " + term); assertEquals(testTerms[i], term); postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE); assertNotNull(postingsEnum); int doc = postingsEnum.docID(); assertEquals(-1, doc); assertTrue(postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc()); } assertNull(termsEnum.next()); } reader.close(); }
Example 3
Source File: TestMultiFields.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testSeparateEnums() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document d = new Document(); d.add(newStringField("f", "j", Field.Store.NO)); w.addDocument(d); w.commit(); w.addDocument(d); IndexReader r = w.getReader(); w.close(); PostingsEnum d1 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE); PostingsEnum d2 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE); assertEquals(0, d1.nextDoc()); assertEquals(0, d2.nextDoc()); r.close(); dir.close(); }
Example 4
Source File: TestDocsAndPositions.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testDocsEnumStart() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(newStringField("foo", "bar", Field.Store.NO)); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); LeafReader r = getOnlyLeafReader(reader); PostingsEnum disi = TestUtil.docs(random(), r, "foo", new BytesRef("bar"), null, PostingsEnum.NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = r.terms("foo").iterator(); assertTrue(te.seekExact(new BytesRef("bar"))); disi = TestUtil.docs(random(), te, disi, PostingsEnum.NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); writer.close(); r.close(); dir.close(); }
Example 5
Source File: TestDocCount.java From lucene-solr with Apache License 2.0 | 6 votes |
private void verifyCount(IndexReader ir) throws Exception { final Collection<String> fields = FieldInfos.getIndexedFields(ir); for (String field : fields) { Terms terms = MultiTerms.getTerms(ir, field); if (terms == null) { continue; } int docCount = terms.getDocCount(); FixedBitSet visited = new FixedBitSet(ir.maxDoc()); TermsEnum te = terms.iterator(); while (te.next() != null) { PostingsEnum de = TestUtil.docs(random(), te, null, PostingsEnum.NONE); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visited.set(de.docID()); } } assertEquals(visited.cardinality(), docCount); } }
Example 6
Source File: TestSegmentTermDocs.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testTermDocs() throws IOException { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random())); assertTrue(reader != null); TermsEnum terms = reader.terms(DocHelper.TEXT_FIELD_2_KEY).iterator(); terms.seekCeil(new BytesRef("field")); PostingsEnum termDocs = TestUtil.docs(random(), terms, null, PostingsEnum.FREQS); if (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int docId = termDocs.docID(); assertTrue(docId == 0); int freq = termDocs.freq(); assertTrue(freq == 3); } reader.close(); }
Example 7
Source File: TestDirectoryReader.java From lucene-solr with Apache License 2.0 | 6 votes |
void assertTermDocsCount(String msg, IndexReader reader, Term term, int expected) throws IOException { PostingsEnum tdocs = TestUtil.docs(random(), reader, term.field(), new BytesRef(term.text()), null, 0); int count = 0; if (tdocs != null) { while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) { count++; } } assertEquals(msg + ", count mismatch", expected, count); }
Example 8
Source File: TestOmitPositions.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBasic() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Field f = newField("foo", "this is a test test", ft); doc.add(f); for (int i = 0; i < 100; i++) { w.addDocument(doc); } IndexReader reader = w.getReader(); w.close(); assertNotNull(MultiTerms.getTermPostingsEnum(reader, "foo", new BytesRef("test"))); PostingsEnum de = TestUtil.docs(random(), reader, "foo", new BytesRef("test"), null, PostingsEnum.FREQS); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(2, de.freq()); } reader.close(); dir.close(); }
Example 9
Source File: TestSegmentReader.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testTerms() throws IOException { final Collection<String> fields = FieldInfos.getIndexedFields(reader); for (String field : fields) { Terms terms = MultiTerms.getTerms(reader, field); assertNotNull(terms); TermsEnum termsEnum = terms.iterator(); while(termsEnum.next() != null) { BytesRef term = termsEnum.term(); assertTrue(term != null); String fieldValue = (String) DocHelper.nameValues.get(field); assertTrue(fieldValue.indexOf(term.utf8ToString()) != -1); } } PostingsEnum termDocs = TestUtil.docs(random(), reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"), null, 0); assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); termDocs = TestUtil.docs(random(), reader, DocHelper.NO_NORMS_KEY, new BytesRef(DocHelper.NO_NORMS_TEXT), null, 0); assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field")); // NOTE: prior rev of this test was failing to first // call next here: assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(positions.docID() == 0); assertTrue(positions.nextPosition() >= 0); }
Example 10
Source File: TestPerSegmentDeletes.java From lucene-solr with Apache License 2.0 | 5 votes |
public int[] toDocsArray(Term term, Bits bits, IndexReader reader) throws IOException { TermsEnum ctermsEnum = MultiTerms.getTerms(reader, term.field).iterator(); if (ctermsEnum.seekExact(new BytesRef(term.text()))) { PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, null, PostingsEnum.NONE); return toArray(postingsEnum); } return null; }
Example 11
Source File: TestAddIndexes.java From lucene-solr with Apache License 2.0 | 5 votes |
private void verifyTermDocs(Directory dir, Term term, int numDocs) throws IOException { IndexReader reader = DirectoryReader.open(dir); PostingsEnum postingsEnum = TestUtil.docs(random(), reader, term.field, term.bytes, null, PostingsEnum.NONE); int count = 0; while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) count++; assertEquals(numDocs, count); reader.close(); }
Example 12
Source File: TestIndexWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testHighFreqTerm() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's StringBuilder b = new StringBuilder(1024*1024); for(int i=0;i<4096;i++) { b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); b.append(" a a a a a a a a"); } Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(true); doc.add(newField("field", b.toString(), customType)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(dir); assertEquals(1, reader.maxDoc()); assertEquals(1, reader.numDocs()); Term t = new Term("field", "a"); assertEquals(1, reader.docFreq(t)); PostingsEnum td = TestUtil.docs(random(), reader, "field", new BytesRef("a"), null, PostingsEnum.FREQS); td.nextDoc(); assertEquals(128*1024, td.freq()); reader.close(); dir.close(); }
Example 13
Source File: TestParallelTermEnum.java From lucene-solr with Apache License 2.0 | 5 votes |
private void checkTerms(Terms terms, String... termsList) throws IOException { assertNotNull(terms); final TermsEnum te = terms.iterator(); for (String t : termsList) { BytesRef b = te.next(); assertNotNull(b); assertEquals(t, b.utf8ToString()); PostingsEnum td = TestUtil.docs(random(), te, null, PostingsEnum.NONE); assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); } assertNull(te.next()); }
Example 14
Source File: TestSegmentMerger.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMerge() throws IOException { final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, null, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, new FieldInfos.FieldNumbers(null), newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)))); MergeState mergeState = merger.merge(); int docsMerged = mergeState.segmentInfo.maxDoc(); assertTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo( mergeState.segmentInfo, 0, 0, -1L, -1L, -1L, StringHelper.randomId()), Version.LATEST.major, newIOContext(random())); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Document newDoc1 = mergedReader.document(0); assertTrue(newDoc1 != null); //There are 2 unstored fields on the document assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size()); Document newDoc2 = mergedReader.document(1); assertTrue(newDoc2 != null); assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size()); PostingsEnum termDocs = TestUtil.docs(random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), null, 0); assertTrue(termDocs != null); assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); int tvCount = 0; for(FieldInfo fieldInfo : mergedReader.getFieldInfos()) { if (fieldInfo.hasVectors()) { tvCount++; } } //System.out.println("stored size: " + stored.size()); assertEquals("We do not have 3 fields that were indexed with term vector", 3, tvCount); Terms vector = mergedReader.getTermVectors(0).terms(DocHelper.TEXT_FIELD_2_KEY); assertNotNull(vector); assertEquals(3, vector.size()); TermsEnum termsEnum = vector.iterator(); int i = 0; while (termsEnum.next() != null) { String term = termsEnum.term().utf8ToString(); int freq = (int) termsEnum.totalTermFreq(); //System.out.println("Term: " + term + " Freq: " + freq); assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1); assertTrue(DocHelper.FIELD_2_FREQS[i] == freq); i++; } TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); }
Example 15
Source File: TestCodecs.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testFixedPostings() throws Throwable { final int NUM_TERMS = 100; final TermData[] terms = new TermData[NUM_TERMS]; for(int i=0;i<NUM_TERMS;i++) { final int[] docs = new int[] {i}; final String text = Integer.toString(i, Character.MAX_RADIX); terms[i] = new TermData(text, docs, null); } final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); final FieldData field = new FieldData("field", builder, terms, true, false); final FieldData[] fields = new FieldData[] {field}; final FieldInfos fieldInfos = builder.finish(); final Directory dir = newDirectory(); Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); this.write(si, fieldInfos, dir, fields); final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); final Iterator<String> fieldsEnum = reader.iterator(); String fieldName = fieldsEnum.next(); assertNotNull(fieldName); final Terms terms2 = reader.terms(fieldName); assertNotNull(terms2); final TermsEnum termsEnum = terms2.iterator(); PostingsEnum postingsEnum = null; for(int i=0;i<NUM_TERMS;i++) { final BytesRef term = termsEnum.next(); assertNotNull(term); assertEquals(terms[i].text2, term.utf8ToString()); // do this twice to stress test the codec's reuse, ie, // make sure it properly fully resets (rewinds) its // internal state: for(int iter=0;iter<2;iter++) { postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE); assertEquals(terms[i].docs[0], postingsEnum.nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc()); } } assertNull(termsEnum.next()); for(int i=0;i<NUM_TERMS;i++) { assertEquals(termsEnum.seekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND); } assertFalse(fieldsEnum.hasNext()); reader.close(); dir.close(); }
Example 16
Source File: TestDirectoryReader.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMultiTermDocs() throws IOException { Directory ramDir1=newDirectory(); addDoc(random(), ramDir1, "test foo", true); Directory ramDir2=newDirectory(); addDoc(random(), ramDir2, "test blah", true); Directory ramDir3=newDirectory(); addDoc(random(), ramDir3, "test wow", true); IndexReader[] readers1 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir3)}; IndexReader[] readers2 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir2), DirectoryReader.open(ramDir3)}; MultiReader mr2 = new MultiReader(readers1); MultiReader mr3 = new MultiReader(readers2); // test mixing up TermDocs and TermEnums from different readers. TermsEnum te2 = MultiTerms.getTerms(mr2, "body").iterator(); te2.seekCeil(new BytesRef("wow")); PostingsEnum td = TestUtil.docs(random(), mr2, "body", te2.term(), null, 0); TermsEnum te3 = MultiTerms.getTerms(mr3, "body").iterator(); te3.seekCeil(new BytesRef("wow")); td = TestUtil.docs(random(), te3, td, 0); int ret = 0; // This should blow up if we forget to check that the TermEnum is from the same // reader as the TermDocs. while (td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ret += td.docID(); // really a dummy assert to ensure that we got some docs and to ensure that // nothing is eliminated by hotspot assertTrue(ret > 0); readers1[0].close(); readers1[1].close(); readers2[0].close(); readers2[1].close(); readers2[2].close(); ramDir1.close(); ramDir2.close(); ramDir3.close(); }
Example 17
Source File: TestIndexWriterWithThreads.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testCloseWithThreads() throws Exception { int NUM_THREADS = 3; int numIterations = TEST_NIGHTLY ? 7 : 3; for(int iter=0;iter<numIterations;iter++) { if (VERBOSE) { System.out.println("\nTEST: iter=" + iter); } Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(10) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(4)) .setCommitOnClose(false) ); ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); CyclicBarrier syncStart = new CyclicBarrier(NUM_THREADS + 1); IndexerThread[] threads = new IndexerThread[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { threads[i] = new IndexerThread(writer, false, syncStart); threads[i].start(); } syncStart.await(); boolean done = false; while (!done) { Thread.sleep(100); for(int i=0;i<NUM_THREADS;i++) // only stop when at least one thread has added a doc if (threads[i].addCount > 0) { done = true; break; } else if (!threads[i].isAlive()) { fail("thread failed before indexing a single document"); } } if (VERBOSE) { System.out.println("\nTEST: now close"); } try { writer.commit(); } finally { writer.close(); } // Make sure threads that are adding docs are not hung: for(int i=0;i<NUM_THREADS;i++) { // Without fix for LUCENE-1130: one of the // threads will hang threads[i].join(); // [DW] this is unreachable once join() returns a thread cannot be alive. if (threads[i].isAlive()) fail("thread seems to be hung"); } // Quick test to make sure index is not corrupt: IndexReader reader = DirectoryReader.open(dir); PostingsEnum tdocs = TestUtil.docs(random(), reader, "field", new BytesRef("aaa"), null, 0); int count = 0; while(tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { count++; } assertTrue(count > 0); reader.close(); dir.close(); } }
Example 18
Source File: TestPerfTasksLogic.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Test ReadTokensTask */ public void testReadTokens() throws Exception { // We will call ReadTokens on this many docs final int NUM_DOCS = 20; // Read tokens from first NUM_DOCS docs from Reuters and // then build index from the same docs String algLines1[] = { "# ----- properties ", "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + getReuters20LinesFile(), "# ----- alg ", "{ReadTokens}: " + NUM_DOCS, "ResetSystemErase", "CreateIndex", "{AddDoc}: " + NUM_DOCS, "CloseIndex", }; // Run algo Benchmark benchmark = execBenchmark(algLines1); List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats(); // Count how many tokens all ReadTokens saw int totalTokenCount1 = 0; for (final TaskStats stat : stats) { if (stat.getTask().getName().equals("ReadTokens")) { totalTokenCount1 += stat.getCount(); } } // Separately count how many tokens are actually in the index: IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory()); assertEquals(NUM_DOCS, reader.numDocs()); int totalTokenCount2 = 0; Collection<String> fields = FieldInfos.getIndexedFields(reader); for (String fieldName : fields) { if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) { continue; } Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); PostingsEnum docs = null; while(termsEnum.next() != null) { docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.FREQS); while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } } } reader.close(); // Make sure they are the same assertEquals(totalTokenCount1, totalTokenCount2); }