Java Code Examples for org.apache.lucene.analysis.Token#setPayload()
The following examples show how to use
org.apache.lucene.analysis.Token#setPayload() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPostingsOffsets.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLegalbutVeryLargeOffsets() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500); if (random().nextBoolean()) { t1.setPayload(new BytesRef("test")); } Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE); TokenStream tokenStream = new CannedTokenStream( new Token[] { t1, t2 } ); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // store some term vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); Field field = new Field("foo", tokenStream, ft); doc.add(field); iw.addDocument(doc); iw.close(); dir.close(); }
Example 2
Source File: TokenSourcesTest.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Just make a token with the text, and set the payload * to the text as well. Offsets increment "naturally". */ private Token getToken(String text) { Token t = new Token(text, curOffset, curOffset+text.length()); t.setPayload(new BytesRef(text)); curOffset++; return t; }
Example 3
Source File: TestIndexWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testBrokenPayload() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); Document doc = new Document(); Token token = new Token("bar", 0, 3); BytesRef evil = new BytesRef(new byte[1024]); evil.offset = 1000; // offset + length is now out of bounds. token.setPayload(evil); doc.add(new TextField("foo", new CannedTokenStream(token))); expectThrows(IndexOutOfBoundsException.class, () -> w.addDocument(doc)); w.close(); d.close(); }
Example 4
Source File: TestMaxPosition.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testTooBigPosition() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); // This is at position 1: Token t1 = new Token("foo", 0, 3); t1.setPositionIncrement(2); if (random().nextBoolean()) { t1.setPayload(new BytesRef(new byte[] { 0x1 } )); } Token t2 = new Token("foo", 4, 7); // This should overflow max: t2.setPositionIncrement(IndexWriter.MAX_POSITION); if (random().nextBoolean()) { t2.setPayload(new BytesRef(new byte[] { 0x1 } )); } doc.add(new TextField("foo", new CannedTokenStream(new Token[] {t1, t2}))); expectThrows(IllegalArgumentException.class, () -> { iw.addDocument(doc); }); // Document should not be visible: IndexReader r = DirectoryReader.open(iw); assertEquals(0, r.numDocs()); r.close(); iw.close(); dir.close(); }
Example 5
Source File: TestMaxPosition.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testMaxPosition() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); // This is at position 0: Token t1 = new Token("foo", 0, 3); if (random().nextBoolean()) { t1.setPayload(new BytesRef(new byte[] { 0x1 } )); } Token t2 = new Token("foo", 4, 7); t2.setPositionIncrement(IndexWriter.MAX_POSITION); if (random().nextBoolean()) { t2.setPayload(new BytesRef(new byte[] { 0x1 } )); } doc.add(new TextField("foo", new CannedTokenStream(new Token[] {t1, t2}))); iw.addDocument(doc); // Document should be visible: IndexReader r = DirectoryReader.open(iw); assertEquals(1, r.numDocs()); PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "foo", new BytesRef("foo")); // "foo" appears in docID=0 assertEquals(0, postings.nextDoc()); // "foo" appears 2 times in the doc assertEquals(2, postings.freq()); // first at pos=0 assertEquals(0, postings.nextPosition()); // next at pos=MAX assertEquals(IndexWriter.MAX_POSITION, postings.nextPosition()); r.close(); iw.close(); dir.close(); }