Java Code Examples for org.apache.lucene.document.FieldType#setStoreTermVectors()
The following examples show how to use
org.apache.lucene.document.FieldType#setStoreTermVectors() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFixBrokenOffsetsFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBogusTermVectors() throws IOException { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); Field field = new Field("foo", "", ft); field.setTokenStream(new FixBrokenOffsetsFilter(new CannedTokenStream( new Token("bar", 5, 10), new Token("bar", 1, 4) ))); doc.add(field); iw.addDocument(doc); iw.close(); dir.close(); }
Example 2
Source File: TestCustomTermFreq.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testInvalidTermVectorPositions() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); fieldType.setStoreTermVectors(true); fieldType.setStoreTermVectorPositions(true); Field field = new Field("field", new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"}, new int[] {42, 128, 17, 100}), fieldType); doc.add(field); Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);}); assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage()); IOUtils.close(w, dir); }
Example 3
Source File: TestIndexWriterMerging.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testSetMaxMergeDocs() throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())) .setMergeScheduler(new MyMergeScheduler()) .setMaxBufferedDocs(2) .setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); lmp.setMaxMergeDocs(20); lmp.setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); document.add(newField("tvtest", "a b c", customType)); for(int i=0;i<177;i++) iw.addDocument(document); iw.close(); dir.close(); }
Example 4
Source File: TestDirectoryReader.java From lucene-solr with Apache License 2.0 | 6 votes |
static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException { Document doc = new Document(); FieldType customType5 = new FieldType(TextField.TYPE_STORED); customType5.setStoreTermVectors(true); FieldType customType6 = new FieldType(TextField.TYPE_STORED); customType6.setStoreTermVectors(true); customType6.setStoreTermVectorOffsets(true); FieldType customType7 = new FieldType(TextField.TYPE_STORED); customType7.setStoreTermVectors(true); customType7.setStoreTermVectorPositions(true); FieldType customType8 = new FieldType(TextField.TYPE_STORED); customType8.setStoreTermVectors(true); customType8.setStoreTermVectorOffsets(true); customType8.setStoreTermVectorPositions(true); doc.add(newTextField("tvnot", "tvnot", Field.Store.YES)); doc.add(newField("termvector","termvector",customType5)); doc.add(newField("tvoffset","tvoffset", customType6)); doc.add(newField("tvposition","tvposition", customType7)); doc.add(newField("tvpositionoffset","tvpositionoffset", customType8)); writer.addDocument(doc); }
Example 5
Source File: TestPostingsOffsets.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLegalbutVeryLargeOffsets() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500); if (random().nextBoolean()) { t1.setPayload(new BytesRef("test")); } Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE); TokenStream tokenStream = new CannedTokenStream( new Token[] { t1, t2 } ); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // store some term vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); Field field = new Field("foo", tokenStream, ft); doc.add(field); iw.addDocument(doc); iw.close(); dir.close(); }
Example 6
Source File: TestTermVectorsWriter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testNoAbortOnBadTVSettings() throws Exception { Directory dir = newDirectory(); // Don't use RandomIndexWriter because we want to be sure both docs go to 1 seg: IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter iw = new IndexWriter(dir, iwc); Document doc = new Document(); iw.addDocument(doc); FieldType ft = new FieldType(StoredField.TYPE); ft.setStoreTermVectors(true); ft.freeze(); doc.add(new Field("field", "value", ft)); expectThrows(IllegalArgumentException.class, () -> { iw.addDocument(doc); }); IndexReader r = DirectoryReader.open(iw); // Make sure the exc didn't lose our first document: assertEquals(1, r.numDocs()); iw.close(); r.close(); dir.close(); }
Example 7
Source File: TestPostingsOffsets.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testAddFieldTwice() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType customType3 = new FieldType(TextField.TYPE_STORED); customType3.setStoreTermVectors(true); customType3.setStoreTermVectorPositions(true); customType3.setStoreTermVectorOffsets(true); customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); doc.add(new Field("content3", "here is more content with aaa aaa aaa", customType3)); doc.add(new Field("content3", "here is more content with aaa aaa aaa", customType3)); iw.addDocument(doc); iw.close(); dir.close(); // checkindex }
Example 8
Source File: TestTermVectorsWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testEndOffsetPositionStandardEmptyField() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(true); Field f = newField("field", "", customType); Field f2 = newField("field", "crunch man", customType); doc.add(f); doc.add(f2); w.addDocument(doc); w.close(); IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(); assertNotNull(termsEnum.next()); PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL); assertEquals(1, (int) termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(1, dpEnum.startOffset()); assertEquals(7, dpEnum.endOffset()); assertNotNull(termsEnum.next()); dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(8, dpEnum.startOffset()); assertEquals(11, dpEnum.endOffset()); r.close(); dir.close(); }
Example 9
Source File: TermVectorsAdapterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected void createIndex() throws IOException { indexDir = createTempDir("testIndex"); Directory dir = newFSDirectory(indexDir); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new StandardAnalyzer()); FieldType textType = new FieldType(); textType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); textType.setTokenized(true); textType.setStoreTermVectors(true); FieldType textType_pos = new FieldType(); textType_pos.setIndexOptions(IndexOptions.DOCS_AND_FREQS); textType_pos.setTokenized(true); textType_pos.setStoreTermVectors(true); textType_pos.setStoreTermVectorPositions(true); FieldType textType_pos_offset = new FieldType(); textType_pos_offset.setIndexOptions(IndexOptions.DOCS_AND_FREQS); textType_pos_offset.setTokenized(true); textType_pos_offset.setStoreTermVectors(true); textType_pos_offset.setStoreTermVectorPositions(true); textType_pos_offset.setStoreTermVectorOffsets(true); String text = "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife."; Document doc = new Document(); doc.add(newField("text1", text, textType)); doc.add(newField("text2", text, textType_pos)); doc.add(newField("text3", text, textType_pos_offset)); writer.addDocument(doc); writer.commit(); writer.close(); dir.close(); }
Example 10
Source File: LuceneIndexer.java From MtgDesktopCompanion with GNU General Public License v3.0 | 5 votes |
private Document toDocuments(MagicCard mc) { Document doc = new Document(); FieldType fieldType = new FieldType(); fieldType.setStored(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); doc.add(new Field("name", mc.getName(), fieldType)); if(mc.getCost()!=null) doc.add(new Field("cost", mc.getCost(),fieldType)); else doc.add(new Field("cost", "",fieldType)); if(mc.getText()!=null) doc.add(new Field("text", mc.getText(), fieldType)); else doc.add(new Field("text", "", fieldType)); doc.add(new Field("type", mc.getFullType(), fieldType)); doc.add(new Field("set",mc.getCurrentSet().getId(),fieldType)); doc.add(new StoredField("cmc",mc.getCmc())); doc.add(new StringField("data",serializer.toJson(mc),Field.Store.YES)); for(MTGColor color:mc.getColors()) { doc.add(new Field("color", color.getCode(), fieldType)); } return doc; }
Example 11
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testWithSynonym() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document doc = new Document(); doc.add( new Field("field", "the quick brown fox", type )); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); int docId = 0; // query1: simple synonym query SynonymQuery synQuery = new SynonymQuery.Builder("field") .addTerm(new Term("field", "quick")) .addTerm(new Term("field", "fast")) .build(); FieldQuery fieldQuery = highlighter.getFieldQuery(synQuery, reader); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("the <b>quick</b> brown fox", bestFragments[0]); // query2: boolean query with synonym query BooleanQuery.Builder bq = new BooleanQuery.Builder() .add(new BooleanClause(synQuery, Occur.MUST)) .add(new BooleanClause(new TermQuery(new Term("field", "fox")), Occur.MUST)); fieldQuery = highlighter.getFieldQuery(bq.build(), reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("the <b>quick</b> brown <b>fox</b>", bestFragments[0]); reader.close(); writer.close(); dir.close(); }
Example 12
Source File: TestTermVectors.java From lucene-solr with Apache License 2.0 | 5 votes |
@BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy())); //writer.setNoCFSRatio(1.0); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); } else if (mod2 == 0) { ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); } else if (mod3 == 0) { ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); } else { ft.setStoreTermVectors(true); } doc.add(new Field("field", English.intToEnglish(i), ft)); //test no term vectors too doc.add(new TextField("noTV", English.intToEnglish(i), Field.Store.YES)); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); }
Example 13
Source File: TestBackwardsCompatibility.java From lucene-solr with Apache License 2.0 | 4 votes |
private void addDoc(IndexWriter writer, int id) throws IOException { Document doc = new Document(); doc.add(new TextField("content", "aaa", Field.Store.NO)); doc.add(new StringField("id", Integer.toString(id), Field.Store.YES)); FieldType customType2 = new FieldType(TextField.TYPE_STORED); customType2.setStoreTermVectors(true); customType2.setStoreTermVectorPositions(true); customType2.setStoreTermVectorOffsets(true); doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); // add docvalues fields doc.add(new NumericDocValuesField("dvByte", (byte) id)); byte bytes[] = new byte[] { (byte)(id >>> 24), (byte)(id >>> 16),(byte)(id >>> 8),(byte)id }; BytesRef ref = new BytesRef(bytes); doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref)); doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref)); doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref)); doc.add(new SortedDocValuesField("dvBytesSortedVar", ref)); doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref)); doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref)); doc.add(new DoubleDocValuesField("dvDouble", (double)id)); doc.add(new FloatDocValuesField("dvFloat", (float)id)); doc.add(new NumericDocValuesField("dvInt", id)); doc.add(new NumericDocValuesField("dvLong", id)); doc.add(new NumericDocValuesField("dvPacked", id)); doc.add(new NumericDocValuesField("dvShort", (short)id)); doc.add(new SortedSetDocValuesField("dvSortedSet", ref)); doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id)); doc.add(new IntPoint("intPoint1d", id)); doc.add(new IntPoint("intPoint2d", id, 2*id)); doc.add(new FloatPoint("floatPoint1d", (float) id)); doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2*id)); doc.add(new LongPoint("longPoint1d", id)); doc.add(new LongPoint("longPoint2d", id, 2*id)); doc.add(new DoublePoint("doublePoint1d", (double) id)); doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2*id)); doc.add(new BinaryPoint("binaryPoint1d", bytes)); doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes)); // a field with both offsets and term vectors for a cross-check FieldType customType3 = new FieldType(TextField.TYPE_STORED); customType3.setStoreTermVectors(true); customType3.setStoreTermVectorPositions(true); customType3.setStoreTermVectorOffsets(true); customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3)); // a field that omits only positions FieldType customType4 = new FieldType(TextField.TYPE_STORED); customType4.setStoreTermVectors(true); customType4.setStoreTermVectorPositions(false); customType4.setStoreTermVectorOffsets(true); customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS); doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4)); // TODO: // index different norms types via similarity (we use a random one currently?!) // remove any analyzer randomness, explicitly add payloads for certain fields. writer.addDocument(doc); }
Example 14
Source File: TestMemoryIndexAgainstDirectory.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testDuelMemoryIndexCoreDirectoryWithArrayField() throws Exception { final String field_name = "text"; MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); if (random().nextBoolean()) { mockAnalyzer.setOffsetGap(random().nextInt(100)); } //index into a random directory FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document doc = new Document(); doc.add(new Field(field_name, "la la", type)); doc.add(new Field(field_name, "foo bar foo bar foo", type)); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.updateDocument(new Term("id", "1"), doc); writer.commit(); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); //Index document in Memory index MemoryIndex memIndex = new MemoryIndex(true); memIndex.addField(field_name, "la la", mockAnalyzer); memIndex.addField(field_name, "foo bar foo bar foo", mockAnalyzer); //compare term vectors Terms ramTv = reader.getTermVector(0, field_name); IndexReader memIndexReader = memIndex.createSearcher().getIndexReader(); TestUtil.checkReader(memIndexReader); Terms memTv = memIndexReader.getTermVector(0, field_name); compareTermVectors(ramTv, memTv, field_name); memIndexReader.close(); reader.close(); dir.close(); }
Example 15
Source File: TestTermVectorsWriter.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testDoubleOffsetCounting() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType customType = new FieldType(StringField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(true); Field f = newField("field", "abcd", customType); doc.add(f); doc.add(f); Field f2 = newField("field", "", customType); doc.add(f2); doc.add(f); w.addDocument(doc); w.close(); IndexReader r = DirectoryReader.open(dir); Terms vector = r.getTermVectors(0).terms("field"); assertNotNull(vector); TermsEnum termsEnum = vector.iterator(); assertNotNull(termsEnum.next()); assertEquals("", termsEnum.term().utf8ToString()); // Token "" occurred once assertEquals(1, termsEnum.totalTermFreq()); PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(8, dpEnum.startOffset()); assertEquals(8, dpEnum.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc()); // Token "abcd" occurred three times assertEquals(new BytesRef("abcd"), termsEnum.next()); dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL); assertEquals(3, termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(0, dpEnum.startOffset()); assertEquals(4, dpEnum.endOffset()); dpEnum.nextPosition(); assertEquals(4, dpEnum.startOffset()); assertEquals(8, dpEnum.endOffset()); dpEnum.nextPosition(); assertEquals(8, dpEnum.startOffset()); assertEquals(12, dpEnum.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc()); assertNull(termsEnum.next()); r.close(); dir.close(); }
Example 16
Source File: TestIndexWriterWithThreads.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void run() { try { syncStart.await(); } catch (BrokenBarrierException | InterruptedException e) { error = e; throw new RuntimeException(e); } final Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(true); doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); doc.add(new NumericDocValuesField("dv", 5)); int idUpto = 0; int fullCount = 0; do { try { writer.updateDocument(new Term("id", ""+(idUpto++)), doc); addCount++; } catch (IOException ioe) { if (VERBOSE) { System.out.println("TEST: expected exc:"); ioe.printStackTrace(System.out); } //System.out.println(Thread.currentThread().getName() + ": hit exc"); //ioe.printStackTrace(System.out); if (ioe.getMessage().startsWith("fake disk full at") || ioe.getMessage().equals("now failing on purpose")) { diskFull = true; try { Thread.sleep(1); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } if (fullCount++ >= 5) break; } else { if (noErrors) { System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected IOException:"); ioe.printStackTrace(System.out); error = ioe; } break; } } catch (AlreadyClosedException ace) { // OK: abort closes the writer break; } catch (Throwable t) { if (noErrors) { System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected Throwable:"); t.printStackTrace(System.out); error = t; } break; } } while (true); }
Example 17
Source File: TestIndexWriterWithThreads.java From lucene-solr with Apache License 2.0 | 4 votes |
public void _testSingleThreadFailure(MockDirectoryWrapper.Failure failure) throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(2) .setMergeScheduler(new ConcurrentMergeScheduler()) .setCommitOnClose(false); if (iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler) { iwc.setMergeScheduler(new SuppressingConcurrentMergeScheduler() { @Override protected boolean isOK(Throwable th) { return th instanceof AlreadyClosedException || (th instanceof IllegalStateException && th.getMessage().contains("this writer hit an unrecoverable error")); } }); } IndexWriter writer = new IndexWriter(dir, iwc); final Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectorOffsets(true); doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); for(int i=0;i<6;i++) writer.addDocument(doc); dir.failOn(failure); failure.setDoFail(); expectThrows(IOException.class, () -> { writer.addDocument(doc); writer.addDocument(doc); writer.commit(); }); failure.clearDoFail(); expectThrows(AlreadyClosedException.class, () -> { writer.addDocument(doc); writer.commit(); writer.close(); }); assertTrue(writer.isDeleterClosed()); dir.close(); }
Example 18
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testBooleanPhraseWithSynonym() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_NOT_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Token syn = new Token("httpwwwfacebookcom", 6, 29); syn.setPositionIncrement(0); CannedTokenStream ts = new CannedTokenStream( new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29) ); Field field = new Field("field", ts, type); doc.add(field); doc.add(new StoredField("field", "Test: http://www.facebook.com")); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer); int docId = 0; // query1: match PhraseQuery pq = new PhraseQuery("field", "test", "http", "www", "facebook", "com"); FieldQuery fieldQuery = highlighter.getFieldQuery(pq, reader); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query2: match PhraseQuery pq2 = new PhraseQuery("field", "test", "httpwwwfacebookcom", "www", "facebook", "com"); fieldQuery = highlighter.getFieldQuery(pq2, reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query3: OR query1 and query2 together BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(pq, BooleanClause.Occur.SHOULD); bq.add(pq2, BooleanClause.Occur.SHOULD); fieldQuery = highlighter.getFieldQuery(bq.build(), reader); bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); reader.close(); writer.close(); dir.close(); }
Example 19
Source File: TokenSourcesTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Repeat(iterations = 10) //@Seed("947083AB20AB2D4F") public void testRandomizedRoundTrip() throws Exception { final int distinct = TestUtil.nextInt(random(), 1, 10); String[] terms = new String[distinct]; BytesRef[] termBytes = new BytesRef[distinct]; for (int i = 0; i < distinct; ++i) { terms[i] = TestUtil.randomRealisticUnicodeString(random()); termBytes[i] = new BytesRef(terms[i]); } final BaseTermVectorsFormatTestCase.RandomTokenStream rTokenStream = new BaseTermVectorsFormatTestCase.RandomTokenStream(TestUtil.nextInt(random(), 1, 10), terms, termBytes); //check to see if the token streams might have non-deterministic testable result final boolean storeTermVectorPositions = random().nextBoolean(); final int[] startOffsets = rTokenStream.getStartOffsets(); final int[] positionsIncrements = rTokenStream.getPositionsIncrements(); for (int i = 1; i < positionsIncrements.length; i++) { if (storeTermVectorPositions && positionsIncrements[i] != 0) { continue; } //TODO should RandomTokenStream ensure endOffsets for tokens at same position and same startOffset are greater // than previous token's endOffset? That would increase the testable possibilities. if (startOffsets[i] == startOffsets[i-1]) { if (VERBOSE) System.out.println("Skipping test because can't easily validate random token-stream is correct."); rTokenStream.close(); return; } } //sanity check itself assertTokenStreamContents(rTokenStream, rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(), rTokenStream.getPositionsIncrements()); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED); myFieldType.setStoreTermVectors(true); myFieldType.setStoreTermVectorOffsets(true); myFieldType.setStoreTermVectorPositions(storeTermVectorPositions); //payloads require positions; it will throw an error otherwise myFieldType.setStoreTermVectorPayloads(storeTermVectorPositions && random().nextBoolean()); Document doc = new Document(); doc.add(new Field("field", rTokenStream, myFieldType)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); assertEquals(1, reader.numDocs()); TokenStream vectorTokenStream = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1); //sometimes check payloads PayloadAttribute payloadAttribute = null; if (myFieldType.storeTermVectorPayloads() && usually()) { payloadAttribute = vectorTokenStream.addAttribute(PayloadAttribute.class); } assertTokenStreamContents(vectorTokenStream, rTokenStream.getTerms(), rTokenStream.getStartOffsets(), rTokenStream.getEndOffsets(), myFieldType.storeTermVectorPositions() ? rTokenStream.getPositionsIncrements() : null); //test payloads if (payloadAttribute != null) { vectorTokenStream.reset(); for (int i = 0; vectorTokenStream.incrementToken(); i++) { assertEquals(rTokenStream.getPayloads()[i], payloadAttribute.getPayload()); } } reader.close(); dir.close(); rTokenStream.close(); }
Example 20
Source File: TestPostingsOffsets.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testBasic() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (random().nextBoolean()) { ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(random().nextBoolean()); ft.setStoreTermVectorOffsets(random().nextBoolean()); } Token[] tokens = new Token[] { makeToken("a", 1, 0, 6), makeToken("b", 1, 8, 9), makeToken("a", 1, 9, 17), makeToken("c", 1, 19, 50), }; doc.add(new Field("content", new CannedTokenStream(tokens), ft)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); PostingsEnum dp = MultiTerms.getTermPostingsEnum(r, "content", new BytesRef("a")); assertNotNull(dp); assertEquals(0, dp.nextDoc()); assertEquals(2, dp.freq()); assertEquals(0, dp.nextPosition()); assertEquals(0, dp.startOffset()); assertEquals(6, dp.endOffset()); assertEquals(2, dp.nextPosition()); assertEquals(9, dp.startOffset()); assertEquals(17, dp.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc()); dp = MultiTerms.getTermPostingsEnum(r, "content", new BytesRef("b")); assertNotNull(dp); assertEquals(0, dp.nextDoc()); assertEquals(1, dp.freq()); assertEquals(1, dp.nextPosition()); assertEquals(8, dp.startOffset()); assertEquals(9, dp.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc()); dp = MultiTerms.getTermPostingsEnum(r, "content", new BytesRef("c")); assertNotNull(dp); assertEquals(0, dp.nextDoc()); assertEquals(1, dp.freq()); assertEquals(3, dp.nextPosition()); assertEquals(19, dp.startOffset()); assertEquals(50, dp.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc()); r.close(); dir.close(); }